OSDN Git Service

intel-vaapi-driver 1.8.1.pre1
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_vp9_encoder.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_SURFACES        64
50
51 #define MAX_URB_SIZE                    4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT     1
53
54 #define VP9_BRC_KBPS                    1000
55
56 #define BRC_KERNEL_CBR                  0x0010
57 #define BRC_KERNEL_VBR                  0x0020
58 #define BRC_KERNEL_AVBR                 0x0040
59 #define BRC_KERNEL_CQL                  0x0080
60
61 #define VP9_PIC_STATE_BUFFER_SIZE 192
62
63 typedef struct _intel_kernel_header_
64 {
65     uint32_t       reserved                        : 6;
66     uint32_t       kernel_start_pointer            : 26;
67 } intel_kernel_header;
68
69 typedef struct _intel_vp9_kernel_header {
70     int nKernelCount;
71     intel_kernel_header PLY_DSCALE;
72     intel_kernel_header VP9_ME_P;
73     intel_kernel_header VP9_Enc_I_32x32;
74     intel_kernel_header VP9_Enc_I_16x16;
75     intel_kernel_header VP9_Enc_P;
76     intel_kernel_header VP9_Enc_TX;
77     intel_kernel_header VP9_DYS;
78
79     intel_kernel_header VP9BRC_Intra_Distortion;
80     intel_kernel_header VP9BRC_Init;
81     intel_kernel_header VP9BRC_Reset;
82     intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
84
85 #define DYS_1X_FLAG    0x01
86 #define DYS_4X_FLAG    0x02
87 #define DYS_16X_FLAG   0x04
88
89 struct vp9_surface_param {
90     uint32_t frame_width;
91     uint32_t frame_height;
92 };
93
94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
95 {
96     uint32_t ret_val = 0;
97     if (val < 0)
98     {
99         val = -val;
100         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
101     }
102     else
103     {
104         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
105     }
106     return ret_val;
107 }
108
109 static bool
110 intel_vp9_get_kernel_header_and_size(
111     void                             *pvbinary,
112     int                              binary_size,
113     INTEL_VP9_ENC_OPERATION          operation,
114     int                              krnstate_idx,
115     struct i965_kernel               *ret_kernel)
116 {
117     typedef uint32_t BIN_PTR[4];
118
119     char *bin_start;
120     intel_vp9_kernel_header      *pkh_table;
121     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
122     int next_krnoffset;
123
124     if (!pvbinary || !ret_kernel)
125         return false;
126
127     bin_start = (char *)pvbinary;
128     pkh_table = (intel_vp9_kernel_header *)pvbinary;
129     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
130     next_krnoffset = binary_size;
131
132     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X))
133     {
134         pcurr_header = &pkh_table->PLY_DSCALE;
135     }
136     else if (operation == INTEL_VP9_ENC_ME)
137     {
138         pcurr_header = &pkh_table->VP9_ME_P;
139     }
140     else if (operation == INTEL_VP9_ENC_MBENC)
141     {
142         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
143     }
144     else if (operation == INTEL_VP9_ENC_DYS)
145     {
146         pcurr_header = &pkh_table->VP9_DYS;
147     }
148     else if (operation == INTEL_VP9_ENC_BRC)
149     {
150         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
151     }
152     else
153     {
154         return false;
155     }
156
157     pcurr_header += krnstate_idx;
158     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
159
160     pnext_header = (pcurr_header + 1);
161     if (pnext_header < pinvalid_entry)
162     {
163         next_krnoffset = pnext_header->kernel_start_pointer << 6;
164     }
165     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
166
167     return true;
168 }
169
170
171 static void
172 gen9_free_surfaces_vp9(void **data)
173 {
174     struct gen9_surface_vp9 *vp9_surface;
175
176     if (!data || !*data)
177         return;
178
179     vp9_surface = *data;
180
181     if (vp9_surface->scaled_4x_surface_obj) {
182         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
183         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
184         vp9_surface->scaled_4x_surface_obj = NULL;
185     }
186
187     if (vp9_surface->scaled_16x_surface_obj) {
188         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
189         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
190         vp9_surface->scaled_16x_surface_obj = NULL;
191     }
192
193     if (vp9_surface->dys_4x_surface_obj) {
194         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
195         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
196         vp9_surface->dys_4x_surface_obj = NULL;
197     }
198
199     if (vp9_surface->dys_16x_surface_obj) {
200         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
201         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
202         vp9_surface->dys_16x_surface_obj = NULL;
203     }
204
205     if (vp9_surface->dys_surface_obj) {
206         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
207         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
208         vp9_surface->dys_surface_obj = NULL;
209     }
210
211     free(vp9_surface);
212
213     *data = NULL;
214
215     return;
216 }
217
218 static VAStatus
219 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
220                              struct object_surface *obj_surface,
221                              struct vp9_surface_param *surface_param)
222 {
223     struct i965_driver_data *i965 = i965_driver_data(ctx);
224     struct gen9_surface_vp9 *vp9_surface;
225     int downscaled_width_4x, downscaled_height_4x;
226     int downscaled_width_16x, downscaled_height_16x;
227
228     if (!obj_surface || !obj_surface->bo)
229         return VA_STATUS_ERROR_INVALID_SURFACE;
230
231     if (obj_surface->private_data &&
232         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
233         obj_surface->free_private_data(&obj_surface->private_data);
234         obj_surface->private_data = NULL;
235     }
236
237     if (obj_surface->private_data) {
238         /* if the frame width/height is already the same as the expected,
239          * it is unncessary to reallocate it.
240          */
241         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
242         if (vp9_surface->frame_width >= surface_param->frame_width ||
243             vp9_surface->frame_height >= surface_param->frame_height)
244             return VA_STATUS_SUCCESS;
245
246         obj_surface->free_private_data(&obj_surface->private_data);
247         obj_surface->private_data = NULL;
248         vp9_surface = NULL;
249     }
250
251     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
252
253     if (!vp9_surface)
254         return VA_STATUS_ERROR_ALLOCATION_FAILED;
255
256     vp9_surface->ctx = ctx;
257     obj_surface->private_data = vp9_surface;
258     obj_surface->free_private_data = gen9_free_surfaces_vp9;
259
260     vp9_surface->frame_width = surface_param->frame_width;
261     vp9_surface->frame_height = surface_param->frame_height;
262
263     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
264     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
265
266     i965_CreateSurfaces(ctx,
267                         downscaled_width_4x,
268                         downscaled_height_4x,
269                         VA_RT_FORMAT_YUV420,
270                         1,
271                         &vp9_surface->scaled_4x_surface_id);
272
273     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
274
275     if (!vp9_surface->scaled_4x_surface_obj) {
276         return VA_STATUS_ERROR_ALLOCATION_FAILED;
277     }
278
279     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
280                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
281
282     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
283     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
284     i965_CreateSurfaces(ctx,
285                         downscaled_width_16x,
286                         downscaled_height_16x,
287                         VA_RT_FORMAT_YUV420,
288                         1,
289                         &vp9_surface->scaled_16x_surface_id);
290     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
291
292     if (!vp9_surface->scaled_16x_surface_obj) {
293         return VA_STATUS_ERROR_ALLOCATION_FAILED;
294     }
295
296     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
297                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
298
299     return VA_STATUS_SUCCESS;
300 }
301
302 static VAStatus
303 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
304                             struct object_surface *obj_surface,
305                             struct vp9_surface_param *surface_param)
306 {
307     struct i965_driver_data *i965 = i965_driver_data(ctx);
308     struct gen9_surface_vp9 *vp9_surface;
309     int dys_width_4x, dys_height_4x;
310     int dys_width_16x, dys_height_16x;
311
312     /* As this is handled after the surface checking, it is unnecessary
313      * to check the surface bo and vp9_priv_surface again
314      */
315
316     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
317
318     if (!vp9_surface)
319         return VA_STATUS_ERROR_INVALID_SURFACE;
320
321     /* if the frame_width/height of dys_surface is the same as
322      * the expected, it is unnecessary to allocate it again
323      */
324     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
325         vp9_surface->dys_frame_width == surface_param->frame_width)
326         return VA_STATUS_SUCCESS;
327
328     if (vp9_surface->dys_4x_surface_obj) {
329         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
330         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
331         vp9_surface->dys_4x_surface_obj = NULL;
332     }
333
334     if (vp9_surface->dys_16x_surface_obj) {
335         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
336         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
337         vp9_surface->dys_16x_surface_obj = NULL;
338     }
339
340     if (vp9_surface->dys_surface_obj) {
341         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
342         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
343         vp9_surface->dys_surface_obj = NULL;
344     }
345
346     vp9_surface->dys_frame_width = surface_param->frame_width;
347     vp9_surface->dys_frame_height = surface_param->frame_height;
348
349     i965_CreateSurfaces(ctx,
350                         surface_param->frame_width,
351                         surface_param->frame_height,
352                         VA_RT_FORMAT_YUV420,
353                         1,
354                         &vp9_surface->dys_surface_id);
355     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
356
357     if (!vp9_surface->dys_surface_obj) {
358         return VA_STATUS_ERROR_ALLOCATION_FAILED;
359     }
360
361     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
362                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
363
364     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
365     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
366
367     i965_CreateSurfaces(ctx,
368                         dys_width_4x,
369                         dys_height_4x,
370                         VA_RT_FORMAT_YUV420,
371                         1,
372                         &vp9_surface->dys_4x_surface_id);
373
374     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
375
376     if (!vp9_surface->dys_4x_surface_obj) {
377         return VA_STATUS_ERROR_ALLOCATION_FAILED;
378     }
379
380     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
381                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
382
383     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
384     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
385     i965_CreateSurfaces(ctx,
386                         dys_width_16x,
387                         dys_height_16x,
388                         VA_RT_FORMAT_YUV420,
389                         1,
390                         &vp9_surface->dys_16x_surface_id);
391     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
392
393     if (!vp9_surface->dys_16x_surface_obj) {
394         return VA_STATUS_ERROR_ALLOCATION_FAILED;
395     }
396
397     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
398                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
399
400     return VA_STATUS_SUCCESS;
401 }
402
403 static VAStatus
404 gen9_vp9_allocate_resources(VADriverContextP ctx,
405                             struct encode_state *encode_state,
406                             struct intel_encoder_context *encoder_context,
407                             int allocate)
408 {
409     struct i965_driver_data *i965 = i965_driver_data(ctx);
410     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
411     struct gen9_vp9_state *vp9_state;
412     int allocate_flag, i;
413     int res_size;
414     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
415     unsigned int width, height;
416
417     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
418
419     if (!vp9_state || !vp9_state->pic_param)
420         return VA_STATUS_ERROR_INVALID_PARAMETER;
421
422     /* the buffer related with BRC is not changed. So it is allocated
423      * based on the input parameter
424      */
425     if (allocate) {
426         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
427         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
428         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
429         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
430         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
431         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
432         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
433         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
434         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
435         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
436         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
437
438         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439                                  &vme_context->res_brc_history_buffer,
440                                  VP9_BRC_HISTORY_BUFFER_SIZE,
441                                  "Brc History buffer");
442         if (!allocate_flag)
443             goto failed_allocation;
444         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
445                                  &vme_context->res_brc_const_data_buffer,
446                                  VP9_BRC_CONSTANTSURFACE_SIZE,
447                                  "Brc Constant buffer");
448         if (!allocate_flag)
449             goto failed_allocation;
450
451         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
452            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
453         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
454                                  &vme_context->res_brc_mbenc_curbe_write_buffer,
455                                  res_size,
456                                  "Brc Curbe write");
457         if (!allocate_flag)
458             goto failed_allocation;
459
460         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
461         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
462                                  &vme_context->res_pic_state_brc_read_buffer,
463                                  res_size,
464                                  "Pic State Brc_read");
465         if (!allocate_flag)
466             goto failed_allocation;
467
468         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
469         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
470                                  &vme_context->res_pic_state_brc_write_hfw_read_buffer,
471                                  res_size,
472                                  "Pic State Brc_write Hfw_Read");
473         if (!allocate_flag)
474             goto failed_allocation;
475
476         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
477         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
478                                  &vme_context->res_pic_state_hfw_write_buffer,
479                                  res_size,
480                                  "Pic State Hfw Write");
481         if (!allocate_flag)
482             goto failed_allocation;
483
484         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
485         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
486                                  &vme_context->res_seg_state_brc_read_buffer,
487                                  res_size,
488                                  "Segment state brc_read");
489         if (!allocate_flag)
490             goto failed_allocation;
491
492         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
493         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
494                                  &vme_context->res_seg_state_brc_write_buffer,
495                                  res_size,
496                                  "Segment state brc_write");
497         if (!allocate_flag)
498             goto failed_allocation;
499
500         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
501         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
502                                  &vme_context->res_brc_bitstream_size_buffer,
503                                  res_size,
504                                  "Brc bitstream buffer");
505         if (!allocate_flag)
506             goto failed_allocation;
507
508         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
509         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
510                                  &vme_context->res_brc_hfw_data_buffer,
511                                  res_size,
512                                  "mfw Brc data");
513         if (!allocate_flag)
514             goto failed_allocation;
515
516         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
517         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
518                                  &vme_context->res_brc_mmdk_pak_buffer,
519                                  res_size,
520                                  "Brc mmdk_pak");
521         if (!allocate_flag)
522             goto failed_allocation;
523     }
524
525     /* If the width/height of allocated buffer is greater than the expected,
526      * it is unnecessary to allocate it again
527      */
528     if (vp9_state->res_width >= vp9_state->frame_width &&
529         vp9_state->res_height >= vp9_state->frame_height) {
530
531         return VA_STATUS_SUCCESS;
532     }
533     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
534     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
535     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
536
537     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
538     res_size = frame_width_in_sb * 64;
539     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
540                                  &vme_context->res_hvd_line_buffer,
541                                  res_size,
542                                  "VP9 hvd line line");
543     if (!allocate_flag)
544         goto failed_allocation;
545
546     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
547     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
548                                  &vme_context->res_hvd_tile_line_buffer,
549                                  res_size,
550                                  "VP9 hvd tile_line line");
551     if (!allocate_flag)
552         goto failed_allocation;
553
554     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
555     res_size = frame_width_in_sb * 18 * 64;
556     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
557                                  &vme_context->res_deblocking_filter_line_buffer,
558                                  res_size,
559                                  "VP9 deblocking filter line");
560     if (!allocate_flag)
561         goto failed_allocation;
562
563     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
564     res_size = frame_width_in_sb * 18 * 64;
565     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
566                                  &vme_context->res_deblocking_filter_tile_line_buffer,
567                                  res_size,
568                                  "VP9 deblocking tile line");
569     if (!allocate_flag)
570         goto failed_allocation;
571
572     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
573     res_size = frame_height_in_sb * 17 * 64;
574     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
575                                  &vme_context->res_deblocking_filter_tile_col_buffer,
576                                  res_size,
577                                  "VP9 deblocking tile col");
578     if (!allocate_flag)
579         goto failed_allocation;
580
581     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
582     res_size = frame_width_in_sb * 5 * 64;
583     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
584                                  &vme_context->res_metadata_line_buffer,
585                                  res_size,
586                                  "VP9 metadata line");
587     if (!allocate_flag)
588         goto failed_allocation;
589
590     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
591     res_size = frame_width_in_sb * 5 * 64;
592     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
593                                  &vme_context->res_metadata_tile_line_buffer,
594                                  res_size,
595                                  "VP9 metadata tile line");
596     if (!allocate_flag)
597         goto failed_allocation;
598
599     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
600     res_size = frame_height_in_sb * 5 * 64;
601     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
602                                  &vme_context->res_metadata_tile_col_buffer,
603                                  res_size,
604                                  "VP9 metadata tile col");
605     if (!allocate_flag)
606         goto failed_allocation;
607
608     i965_free_gpe_resource(&vme_context->res_prob_buffer);
609     res_size = 2048;
610     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
611                                  &vme_context->res_prob_buffer,
612                                  res_size,
613                                  "VP9 prob");
614     if (!allocate_flag)
615         goto failed_allocation;
616
617     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
618     res_size = frame_sb_num * 64;
619     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
620                                  &vme_context->res_segmentid_buffer,
621                                  res_size,
622                                  "VP9 segment id");
623     if (!allocate_flag)
624         goto failed_allocation;
625
626     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
627
628     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
629     res_size = 29 * 64;
630     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
631                                  &vme_context->res_prob_delta_buffer,
632                                  res_size,
633                                  "VP9 prob delta");
634     if (!allocate_flag)
635         goto failed_allocation;
636
637     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
638
639     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
640     res_size = 29 * 64;
641     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
642                                  &vme_context->res_prob_delta_buffer,
643                                  res_size,
644                                  "VP9 prob delta");
645     if (!allocate_flag)
646         goto failed_allocation;
647
648     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
649     res_size = 32 * 64;
650     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
651                                  &vme_context->res_compressed_input_buffer,
652                                  res_size,
653                                  "VP9 compressed_input buffer");
654     if (!allocate_flag)
655         goto failed_allocation;
656
657     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
658     res_size = 193 * 64;
659     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
660                                  &vme_context->res_prob_counter_buffer,
661                                  res_size,
662                                  "VP9 prob counter");
663     if (!allocate_flag)
664         goto failed_allocation;
665
666     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
667     res_size = frame_sb_num * 64;
668     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
669                                  &vme_context->res_tile_record_streamout_buffer,
670                                  res_size,
671                                  "VP9 tile record stream_out");
672     if (!allocate_flag)
673         goto failed_allocation;
674
675     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
676     res_size = frame_sb_num * 64;
677     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
678                                  &vme_context->res_cu_stat_streamout_buffer,
679                                  res_size,
680                                  "VP9 CU stat stream_out");
681     if (!allocate_flag)
682         goto failed_allocation;
683
684     width = vp9_state->downscaled_width_4x_in_mb * 32;
685     height = vp9_state->downscaled_height_4x_in_mb * 16;
686     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
687     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
688                                  &vme_context->s4x_memv_data_buffer,
689                                  width, height,
690                                  ALIGN(width, 64),
691                                  "VP9 4x MEMV data");
692     if (!allocate_flag)
693         goto failed_allocation;
694
695     width = vp9_state->downscaled_width_4x_in_mb * 8;
696     height = vp9_state->downscaled_height_4x_in_mb * 16;
697     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
698     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
699                                  &vme_context->s4x_memv_distortion_buffer,
700                                  width, height,
701                                  ALIGN(width, 64),
702                                  "VP9 4x MEMV distorion");
703     if (!allocate_flag)
704         goto failed_allocation;
705
706     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
707     height = vp9_state->downscaled_height_16x_in_mb * 16;
708     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
709     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
710                                  &vme_context->s16x_memv_data_buffer,
711                                  width, height,
712                                  width,
713                                  "VP9 16x MEMV data");
714     if (!allocate_flag)
715         goto failed_allocation;
716
717     width = vp9_state->frame_width_in_mb * 16;
718     height = vp9_state->frame_height_in_mb * 8;
719     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
720     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
721                                  &vme_context->res_output_16x16_inter_modes,
722                                  width, height,
723                                  ALIGN(width, 64),
724                                  "VP9 output inter_mode");
725     if (!allocate_flag)
726         goto failed_allocation;
727
728     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
729                16 * 4;
730     for (i = 0; i < 2; i++) {
731         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
732         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
733                                    &vme_context->res_mode_decision[i],
734                                    res_size,
735                                    "VP9 mode decision");
736         if (!allocate_flag)
737             goto failed_allocation;
738
739     }
740
741     res_size = frame_sb_num * 9 * 64;
742     for (i = 0; i < 2; i++) {
743         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
744         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
745                                    &vme_context->res_mv_temporal_buffer[i],
746                                    res_size,
747                                    "VP9 temporal mv");
748         if (!allocate_flag)
749             goto failed_allocation;
750     }
751
752     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
753     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
754     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
755     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
756                                  &vme_context->res_mb_code_surface,
757                                  ALIGN(res_size, 4096),
758                                  "VP9 mb_code surface");
759     if (!allocate_flag)
760         goto failed_allocation;
761
762     res_size = 128;
763     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
764     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
765                                  &vme_context->res_pak_uncompressed_input_buffer,
766                                  ALIGN(res_size, 4096),
767                                  "VP9 pak_uncompressed_input");
768     if (!allocate_flag)
769         goto failed_allocation;
770
771     if (!vme_context->frame_header_data) {
772         /* allocate 512 bytes for generating the uncompressed header */
773         vme_context->frame_header_data = calloc(1, 512);
774     }
775
776     vp9_state->res_width = vp9_state->frame_width;
777     vp9_state->res_height = vp9_state->frame_height;
778
779     return VA_STATUS_SUCCESS;
780
781 failed_allocation:
782     return VA_STATUS_ERROR_ALLOCATION_FAILED;
783 }
784
785 static void
786 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
787 {
788     int i;
789     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
790
791     if (vp9_state->brc_enabled) {
792         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
793         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
794         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
795         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
796         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
797         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
798         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
799         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
800         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
801         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
802         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
803     }
804
805     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
806     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
807     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
808     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
809     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
810     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
811     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
812     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
813     i965_free_gpe_resource(&vme_context->res_prob_buffer);
814     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
815     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
816     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
817     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
818     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
819     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
820     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
821     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
822     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
823     for (i = 0; i < 2; i++) {
824         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
825     }
826
827     for (i = 0; i < 2; i++) {
828         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
829     }
830
831     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
832     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
833     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
834
835     if (vme_context->frame_header_data) {
836         free(vme_context->frame_header_data);
837         vme_context->frame_header_data = NULL;
838     }
839     return;
840 }
841
842 static void
843 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
844                                         struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
845                                         struct gpe_media_object_walker_parameter *walker_param)
846 {
847     memset(walker_param, 0, sizeof(*walker_param));
848
849     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
850
851     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
852     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
853
854     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
855     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
856
857     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
858     walker_param->global_outer_loop_stride.y = 0;
859
860     walker_param->global_inner_loop_unit.x = 0;
861     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
862
863     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
864     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
865
866     if (kernel_walker_param->no_dependency) {
867         walker_param->scoreboard_mask = 0;
868         walker_param->use_scoreboard = 0;
869         // Raster scan walking pattern
870         walker_param->local_outer_loop_stride.x = 0;
871         walker_param->local_outer_loop_stride.y = 1;
872         walker_param->local_inner_loop_unit.x = 1;
873         walker_param->local_inner_loop_unit.y = 0;
874         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
875         walker_param->local_end.y = 0;
876     } else {
877         walker_param->local_end.x = 0;
878         walker_param->local_end.y = 0;
879
880         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
881             // 45z degree
882             walker_param->scoreboard_mask = 0x0F;
883
884             walker_param->global_loop_exec_count = 0x3FF;
885             walker_param->local_loop_exec_count = 0x3FF;
886
887             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
888             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
889
890             walker_param->global_start.x = 0;
891             walker_param->global_start.y = 0;
892
893             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
894             walker_param->global_outer_loop_stride.y = 0;
895
896             walker_param->global_inner_loop_unit.x = 0;
897             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
898
899             walker_param->block_resolution.x = walker_param->global_resolution.x;
900             walker_param->block_resolution.y = walker_param->global_resolution.y;
901
902             walker_param->local_start.x = 0;
903             walker_param->local_start.y = 0;
904
905             walker_param->local_outer_loop_stride.x = 1;
906             walker_param->local_outer_loop_stride.y = 0;
907
908             walker_param->local_inner_loop_unit.x = -1;
909             walker_param->local_inner_loop_unit.y = 4;
910
911             walker_param->middle_loop_extra_steps = 3;
912             walker_param->mid_loop_unit_x = 0;
913             walker_param->mid_loop_unit_y = 1;
914         } else {
915             // 26 degree
916             walker_param->scoreboard_mask = 0x0F;
917             walker_param->local_outer_loop_stride.x = 1;
918             walker_param->local_outer_loop_stride.y = 0;
919             walker_param->local_inner_loop_unit.x = -2;
920             walker_param->local_inner_loop_unit.y = 1;
921         }
922     }
923 }
924
925 static void
926 gen9_run_kernel_media_object(VADriverContextP ctx,
927                              struct intel_encoder_context *encoder_context,
928                              struct i965_gpe_context *gpe_context,
929                              int media_function,
930                              struct gpe_media_object_parameter *param)
931 {
932     struct intel_batchbuffer *batch = encoder_context->base.batch;
933     struct vp9_encode_status_buffer_internal *status_buffer;
934     struct gen9_vp9_state *vp9_state;
935     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
936
937     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
938     if (!vp9_state || !batch)
939         return;
940
941     intel_batchbuffer_start_atomic(batch, 0x1000);
942
943     status_buffer = &(vp9_state->status_buffer);
944     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
945     mi_store_data_imm.bo = status_buffer->bo;
946     mi_store_data_imm.offset = status_buffer->media_index_offset;
947     mi_store_data_imm.dw0 = media_function;
948     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
949
950     intel_batchbuffer_emit_mi_flush(batch);
951     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
952     gen8_gpe_media_object(ctx, gpe_context, batch, param);
953     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
954
955     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
956
957     intel_batchbuffer_end_atomic(batch);
958
959     intel_batchbuffer_flush(batch);
960 }
961
962 static void
963 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
964                                     struct intel_encoder_context *encoder_context,
965                                     struct i965_gpe_context *gpe_context,
966                                     int media_function,
967                                     struct gpe_media_object_walker_parameter *param)
968 {
969     struct intel_batchbuffer *batch = encoder_context->base.batch;
970     struct vp9_encode_status_buffer_internal *status_buffer;
971     struct gen9_vp9_state *vp9_state;
972     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
973
974     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
975     if (!vp9_state || !batch)
976         return;
977
978     intel_batchbuffer_start_atomic(batch, 0x1000);
979
980     intel_batchbuffer_emit_mi_flush(batch);
981
982     status_buffer = &(vp9_state->status_buffer);
983     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
984     mi_store_data_imm.bo = status_buffer->bo;
985     mi_store_data_imm.offset = status_buffer->media_index_offset;
986     mi_store_data_imm.dw0 = media_function;
987     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
988
989     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
990     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
991     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
992
993     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
994
995     intel_batchbuffer_end_atomic(batch);
996
997     intel_batchbuffer_flush(batch);
998 }
999
1000 static
1001 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
1002                             struct encode_state *encode_state,
1003                             struct i965_gpe_context *gpe_context,
1004                             struct intel_encoder_context *encoder_context,
1005                             struct gen9_vp9_brc_curbe_param *param)
1006 {
1007     VAEncSequenceParameterBufferVP9 *seq_param;
1008     VAEncPictureParameterBufferVP9  *pic_param;
1009     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
1010     vp9_brc_curbe_data      *cmd;
1011     double                  dbps_ratio, dInputBitsPerFrame;
1012     struct gen9_vp9_state *vp9_state;
1013
1014     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1015
1016     pic_param      = param->ppic_param;
1017     seq_param      = param->pseq_param;
1018     segment_param  = param->psegment_param;
1019
1020     cmd = i965_gpe_context_map_curbe(gpe_context);
1021
1022     if (!cmd)
1023         return;
1024
1025     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1026
1027     if (!vp9_state->dys_enabled)
1028     {
1029         cmd->dw0.frame_width  = pic_param->frame_width_src;
1030         cmd->dw0.frame_height = pic_param->frame_height_src;
1031     }
1032     else
1033     {
1034         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1035         cmd->dw0.frame_height = pic_param->frame_height_dst;
1036     }
1037
1038     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1039     cmd->dw1.segmentation_enable  = 0;
1040     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1041     cmd->dw1.num_tlevels          = 1;
1042
1043     switch(param->media_state_type)
1044     {
1045         case VP9_MEDIA_STATE_BRC_INIT_RESET:
1046         {
1047             cmd->dw3.max_level_ratiot0 = 0;
1048             cmd->dw3.max_level_ratiot1 = 0;
1049             cmd->dw3.max_level_ratiot2 = 0;
1050             cmd->dw3.max_level_ratiot3 = 0;
1051
1052             cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1053                                seq_param->max_frame_height;
1054             cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1055             cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1056             cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1057                                                   VP9_BRC_KBPS;
1058             cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1059                                                   VP9_BRC_KBPS;
1060             cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1061                                                   VP9_BRC_KBPS;
1062             cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1063             cmd->dw11.frame_rated           = vp9_state->framerate.den;
1064
1065             cmd->dw14.avbr_accuracy         = 30;
1066             cmd->dw14.avbr_convergence      = 150;
1067
1068             if (encoder_context->rate_control_mode == VA_RC_CBR)
1069             {
1070                 cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1071                 cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1072                 cmd->dw9.min_bit_rate  = 0;
1073             }
1074             else if (encoder_context->rate_control_mode == VA_RC_VBR)
1075             {
1076                 cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1077             }
1078             else
1079             {
1080                 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1081                 cmd->dw16.cq_level = 30;
1082             }
1083             cmd->dw12.gopp = seq_param->intra_period - 1;
1084
1085             cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1086             cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1087
1088             cmd->dw15.min_qp          = 0;
1089             cmd->dw15.max_qp          = 255;
1090
1091             cmd->dw16.cq_level            = 30;
1092
1093             cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1094             cmd->dw17.brc_overshoot_cbr_pct = 150;
1095
1096             dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1097             dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1098             if (dbps_ratio < 0.1)
1099                 dbps_ratio = 0.1;
1100             if (dbps_ratio > 3.5)
1101                 dbps_ratio = 3.5;
1102
1103             *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1104             *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1105
1106             cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1107             cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1108             cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1109             cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1110             cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1111             cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1112             cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1113             cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1114
1115             cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1116             cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1117             cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1118             cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1119             cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1120             cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1121             cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1122             cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1123
1124             cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1125             cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1126             cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1127             cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1128             cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1129             cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1130             cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1131             cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1132
1133             break;
1134         }
1135         case VP9_MEDIA_STATE_BRC_UPDATE:
1136         {
1137             cmd->dw15.min_qp          = 0;
1138             cmd->dw15.max_qp          = 255;
1139
1140             cmd->dw25.frame_number    = param->frame_number;
1141
1142             // Used in dynamic scaling. set to zero for now
1143             cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1144             cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1145
1146             if (pic_param->pic_flags.bits.segmentation_enabled) {
1147                 cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1148                 cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1149                 cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1150                 cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1151
1152                 cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1153                 cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1154                 cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1155                 cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1156             }
1157
1158             //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1159             cmd->dw34.temporal_id                = 0;
1160             cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1161
1162             cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1163             cmd->dw35.sync_async                 = 0;
1164             cmd->dw35.mbrc                       = param->mbbrc_enabled;
1165             if (*param->pbrc_init_current_target_buf_full_in_bits >
1166                  ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1167                 *param->pbrc_init_current_target_buf_full_in_bits -=
1168                      (double)(*param->pbrc_init_reset_buf_size_in_bits);
1169                 cmd->dw35.overflow = 1;
1170             }
1171             else
1172                 cmd->dw35.overflow = 0;
1173
1174             cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1175
1176             cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1177
1178             *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1179
1180             cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1181             cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1182             cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1183
1184             break;
1185         }
1186         case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1187             cmd->dw2.intra_mode_disable        = 0;
1188             break;
1189         default:
1190             break;
1191     }
1192
1193     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1194     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1195     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1196     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1197     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1198     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1199     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1200     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1201     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1202     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1203     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1204     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1205     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1206     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1207
1208     i965_gpe_context_unmap_curbe(gpe_context);
1209     return;
1210 }
1211
1212 static void
1213 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1214                                      struct encode_state *encode_state,
1215                                      struct intel_encoder_context *encoder_context,
1216                                      struct i965_gpe_context *gpe_context)
1217 {
1218     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1219
1220     gen9_add_buffer_gpe_surface(ctx,
1221                                 gpe_context,
1222                                 &vme_context->res_brc_history_buffer,
1223                                 0,
1224                                 vme_context->res_brc_history_buffer.size,
1225                                 0,
1226                                 VP9_BTI_BRC_HISTORY_G9);
1227
1228     gen9_add_buffer_2d_gpe_surface(ctx,
1229                                    gpe_context,
1230                                    &vme_context->s4x_memv_distortion_buffer,
1231                                    1,
1232                                    I965_SURFACEFORMAT_R8_UNORM,
1233                                    VP9_BTI_BRC_DISTORTION_G9);
1234 }
1235
1236 /* The function related with BRC */
1237 static VAStatus
1238 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1239                                struct encode_state *encode_state,
1240                                struct intel_encoder_context *encoder_context)
1241 {
1242     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1243     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1244     struct gpe_media_object_parameter media_object_param;
1245     struct i965_gpe_context *gpe_context;
1246     int gpe_index = VP9_BRC_INIT;
1247     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1248     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1249     VAEncPictureParameterBufferVP9 *pic_param;
1250     struct gen9_vp9_state *vp9_state;
1251
1252     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1253
1254     if (!vp9_state || !vp9_state->pic_param)
1255         return VA_STATUS_ERROR_INVALID_PARAMETER;
1256
1257     pic_param = vp9_state->pic_param;
1258
1259     if (vp9_state->brc_inited)
1260         gpe_index = VP9_BRC_RESET;
1261
1262     gpe_context = &brc_context->gpe_contexts[gpe_index];
1263
1264     gen8_gpe_context_init(ctx, gpe_context);
1265     gen9_gpe_reset_binding_table(ctx, gpe_context);
1266
1267     brc_initreset_curbe.media_state_type    = media_function;
1268     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1269     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1270     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1271     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1272     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1273     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1274     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1275                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1276     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1277                           &vp9_state->brc_init_reset_buf_size_in_bits;
1278     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1279                           &vp9_state->brc_init_reset_input_bits_per_frame;
1280     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1281     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1282     brc_initreset_curbe.mbbrc_enabled      = 0;
1283     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1284
1285     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1286                                    gpe_context,
1287                                    encoder_context,
1288                                    &brc_initreset_curbe);
1289
1290     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1291     gen8_gpe_setup_interface_data(ctx, gpe_context);
1292
1293     memset(&media_object_param, 0, sizeof(media_object_param));
1294     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1295
1296     return VA_STATUS_SUCCESS;
1297 }
1298
1299 static void
1300 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1301                                      struct encode_state *encode_state,
1302                                      struct intel_encoder_context *encoder_context,
1303                                      struct i965_gpe_context *gpe_context)
1304 {
1305     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1306
1307     struct object_surface *obj_surface;
1308     struct gen9_surface_vp9 *vp9_priv_surface;
1309
1310     /* sScaled4xSurface surface */
1311     obj_surface = encode_state->reconstructed_object;
1312
1313     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1314
1315     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1316     gen9_add_2d_gpe_surface(ctx, gpe_context,
1317                             obj_surface,
1318                             0, 1,
1319                             I965_SURFACEFORMAT_R8_UNORM,
1320                             VP9_BTI_BRC_SRCY4X_G9
1321                             );
1322
1323     gen9_add_adv_gpe_surface(ctx, gpe_context,
1324                              obj_surface,
1325                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1326
1327     gen9_add_buffer_2d_gpe_surface(ctx,
1328                                    gpe_context,
1329                                    &vme_context->s4x_memv_distortion_buffer,
1330                                    1,
1331                                    I965_SURFACEFORMAT_R8_UNORM,
1332                                    VP9_BTI_BRC_DISTORTION_G9);
1333
1334      return;
1335 }
1336
1337 /* The function related with BRC */
1338 static VAStatus
1339 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1340                                struct encode_state *encode_state,
1341                                struct intel_encoder_context *encoder_context)
1342 {
1343     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1344     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1345     struct i965_gpe_context *gpe_context;
1346     int gpe_index = VP9_BRC_INTRA_DIST;
1347     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1348     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1349     VAEncPictureParameterBufferVP9 *pic_param;
1350     struct gen9_vp9_state *vp9_state;
1351     struct gpe_media_object_walker_parameter media_object_walker_param;
1352     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1353
1354     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1355
1356     if (!vp9_state || !vp9_state->pic_param)
1357         return VA_STATUS_ERROR_INVALID_PARAMETER;
1358
1359     pic_param = vp9_state->pic_param;
1360
1361     gpe_context = &brc_context->gpe_contexts[gpe_index];
1362
1363     gen8_gpe_context_init(ctx, gpe_context);
1364     gen9_gpe_reset_binding_table(ctx, gpe_context);
1365
1366     brc_intra_dist_curbe.media_state_type    = media_function;
1367     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1368     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1369     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1370     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1371     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1372     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1373     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1374                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1375     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1376                           &vp9_state->brc_init_reset_buf_size_in_bits;
1377     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1378                           &vp9_state->brc_init_reset_input_bits_per_frame;
1379     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1380     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1381     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1382     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1383
1384     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1385                                    gpe_context,
1386                                    encoder_context,
1387                                    &brc_intra_dist_curbe);
1388
1389     /* zero distortion buffer */
1390     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1391
1392     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1393     gen8_gpe_setup_interface_data(ctx, gpe_context);
1394
1395     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1396     kernel_walker_param.resolution_x = vme_context->downscaled_width_in_mb4x;
1397     kernel_walker_param.resolution_y = vme_context->downscaled_height_in_mb4x;
1398     kernel_walker_param.no_dependency = 1;
1399
1400     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1401
1402     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1403                                         gpe_context,
1404                                         media_function,
1405                                         &media_object_walker_param);
1406
1407     return VA_STATUS_SUCCESS;
1408 }
1409
1410 static void
1411 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1412                                             struct encode_state *encode_state,
1413                                             struct intel_encoder_context *encoder_context,
1414                                             struct i965_gpe_resource *gpe_resource)
1415 {
1416     struct gen9_vp9_state *vp9_state;
1417     VAEncPictureParameterBufferVP9 *pic_param;
1418     int frame_width_minus1, frame_height_minus1;
1419     int is_lossless = 0;
1420     int is_intra_only = 0;
1421     unsigned int last_frame_type;
1422     unsigned int ref_flags;
1423     unsigned int use_prev_frame_mvs, adapt_flag;
1424     struct gen9_surface_vp9 *vp9_surface = NULL;
1425     struct object_surface *obj_surface = NULL;
1426     uint32_t scale_h = 0;
1427     uint32_t scale_w = 0;
1428
1429     char *pdata;
1430     int i, j;
1431     unsigned int *cmd_ptr, cmd_value, tmp;
1432
1433     pdata = i965_map_gpe_resource(gpe_resource);
1434     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1435
1436     if (!vp9_state || !vp9_state->pic_param || !pdata)
1437         return;
1438
1439     pic_param = vp9_state->pic_param;
1440     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1441     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1442     if ((pic_param->luma_ac_qindex == 0) &&
1443         (pic_param->luma_dc_qindex_delta == 0) &&
1444         (pic_param->chroma_ac_qindex_delta == 0) &&
1445         (pic_param->chroma_dc_qindex_delta == 0))
1446         is_lossless = 1;
1447
1448     if (pic_param->pic_flags.bits.frame_type)
1449         is_intra_only = pic_param->pic_flags.bits.intra_only;
1450
1451     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1452
1453     use_prev_frame_mvs = 0;
1454     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1455         last_frame_type = 0;
1456         ref_flags = 0;
1457     } else {
1458         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1459                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1460                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1461                     );
1462         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1463             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1464             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1465             !pic_param->pic_flags.bits.intra_only &&
1466             vp9_state->vp9_last_frame.show_frame &&
1467             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1468              !vp9_state->vp9_last_frame.intra_only)
1469            )
1470             use_prev_frame_mvs = 1;
1471     }
1472     adapt_flag = 0;
1473     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1474         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1475         adapt_flag = 1;
1476
1477     for (i = 0; i < 4; i++) {
1478         uint32_t non_first_pass;
1479         non_first_pass = 1;
1480         if (i == 0)
1481             non_first_pass = 0;
1482
1483         cmd_ptr =(unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1484
1485         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1486         *cmd_ptr++ = (frame_height_minus1 << 16 |
1487                       frame_width_minus1);
1488         /* dw2 */
1489         *cmd_ptr++ = ( 0 << 31 | /* disable segment_in */
1490                        0 << 30 | /* disable segment_out */
1491                        is_lossless << 29 | /* loseless */
1492                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1493                        (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1494                        (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1495                        (pic_param->sharpness_level << 23) |
1496                        (pic_param->filter_level << 17) |
1497                        (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1498                        (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1499                        (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1500                        (last_frame_type << 13) |
1501                        (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1502                        (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1503                        (use_prev_frame_mvs) << 10 |
1504                        ref_flags |
1505                        (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1506                        (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1507                        (is_intra_only << 2) |
1508                        (adapt_flag << 1) |
1509                        (pic_param->pic_flags.bits.frame_type) << 0);
1510
1511         *cmd_ptr++ =((0 << 28) | /* VP9Profile0 */
1512                      (0 << 24) | /* 8-bit depth */
1513                      (0 << 22) | /* only 420 format */
1514                      (0 << 0)  | /* sse statistics */
1515                      (pic_param->log2_tile_rows << 8) |
1516                      (pic_param->log2_tile_columns << 0));
1517
1518         /* dw4..6 */
1519         if (pic_param->pic_flags.bits.frame_type &&
1520             !pic_param->pic_flags.bits.intra_only) {
1521             for (j = 0; j < 3; j++) {
1522                 obj_surface = encode_state->reference_objects[j];
1523                 scale_w = 0;
1524                 scale_h = 0;
1525                 if (obj_surface && obj_surface->private_data) {
1526                     vp9_surface = obj_surface->private_data;
1527                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1528                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1529                     *cmd_ptr++ = (scale_w << 16 |
1530                                   scale_h);
1531                 } else
1532                     *cmd_ptr++ = 0;
1533             }
1534         } else {
1535             *cmd_ptr++ = 0;
1536             *cmd_ptr++ = 0;
1537             *cmd_ptr++ = 0;
1538         }
1539         /* dw7..9 */
1540         for(j = 0; j < 3; j++) {
1541             obj_surface = encode_state->reference_objects[j];
1542             vp9_surface = NULL;
1543
1544             if (obj_surface && obj_surface->private_data) {
1545                 vp9_surface = obj_surface->private_data;
1546                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1547                              (vp9_surface->frame_width - 1);
1548             } else
1549                 *cmd_ptr++ = 0;
1550         }
1551         /* dw10 */
1552         *cmd_ptr++ = 0;
1553         /* dw11 */
1554         *cmd_ptr++ = (1 << 1);
1555         *cmd_ptr++ = 0;
1556
1557         /* dw13 */
1558         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1559                       (0 << 24) | /* tail insertation */
1560                       (pic_param->luma_ac_qindex << 16) |
1561                       0 /* compressed header bin count */);
1562
1563         /* dw14 */
1564         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1565         cmd_value = (tmp << 16);
1566         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1567         cmd_value |= (tmp << 8);
1568         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1569         cmd_value |= tmp;
1570         *cmd_ptr++ = cmd_value;
1571
1572         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1573         cmd_value = tmp;
1574         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1575         cmd_value |= (tmp << 8);
1576         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1577         cmd_value |= (tmp << 16);
1578         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1579         cmd_value |= (tmp << 24);
1580         *cmd_ptr++ = cmd_value;
1581
1582         /* dw16 */
1583         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1584         cmd_value = tmp;
1585         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1586         cmd_value |= (tmp << 8);
1587         *cmd_ptr++ = cmd_value;
1588
1589         /* dw17 */
1590         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1591                       (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1592         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1593                       (vp9_state->frame_header.bit_offset_lf_level << 16);
1594
1595         /* dw19 */
1596         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1597                       non_first_pass << 16);
1598         /* dw20 */
1599         *cmd_ptr++ = (1 << 31) | (256);
1600
1601         /* dw21 */
1602         *cmd_ptr++ = (0 << 31) | 1;
1603
1604         /* dw22-dw24. Frame_delta_qindex_range */
1605         *cmd_ptr++ = 0;
1606         *cmd_ptr++ = 0;
1607         *cmd_ptr++ = 0;
1608
1609         /* dw25-26. frame_delta_lf_range */
1610         *cmd_ptr++ = 0;
1611         *cmd_ptr++ = 0;
1612
1613         /* dw27. frame_delta_lf_min */
1614         *cmd_ptr++ = 0;
1615
1616         /* dw28..30 */
1617         *cmd_ptr++ = 0;
1618         *cmd_ptr++ = 0;
1619         *cmd_ptr++ = 0;
1620
1621         /* dw31 */
1622         *cmd_ptr++ = (0 << 30) | 1;
1623         /* dw32 */
1624         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1625
1626         *cmd_ptr++ = 0;
1627         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1628     }
1629
1630     i965_unmap_gpe_resource(gpe_resource);
1631 }
1632
1633 static void
1634 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1635                                      struct encode_state *encode_state,
1636                                      struct intel_encoder_context *encoder_context,
1637                                      struct i965_gpe_context *brc_gpe_context,
1638                                      struct i965_gpe_context *mbenc_gpe_context)
1639 {
1640     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1641
1642     /* 0. BRC history buffer */
1643     gen9_add_buffer_gpe_surface(ctx,
1644                                 brc_gpe_context,
1645                                 &vme_context->res_brc_history_buffer,
1646                                 0,
1647                                 vme_context->res_brc_history_buffer.size,
1648                                 0,
1649                                 VP9_BTI_BRC_HISTORY_G9);
1650
1651     /* 1. Constant data buffer */
1652     gen9_add_buffer_gpe_surface(ctx,
1653                                 brc_gpe_context,
1654                                 &vme_context->res_brc_const_data_buffer,
1655                                 0,
1656                                 vme_context->res_brc_const_data_buffer.size,
1657                                 0,
1658                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1659
1660     /* 2. Distortion 2D surface buffer */
1661     gen9_add_buffer_2d_gpe_surface(ctx,
1662                                    brc_gpe_context,
1663                                    &vme_context->s4x_memv_distortion_buffer,
1664                                    1,
1665                                    I965_SURFACEFORMAT_R8_UNORM,
1666                                    VP9_BTI_BRC_DISTORTION_G9);
1667
1668     /* 3. pak buffer */
1669     gen9_add_buffer_gpe_surface(ctx,
1670                                 brc_gpe_context,
1671                                 &vme_context->res_brc_mmdk_pak_buffer,
1672                                 0,
1673                                 vme_context->res_brc_mmdk_pak_buffer.size,
1674                                 0,
1675                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1676     /* 4. Mbenc curbe input buffer */
1677     gen9_add_dri_buffer_gpe_surface(ctx,
1678                                     brc_gpe_context,
1679                                     mbenc_gpe_context->curbe.bo,
1680                                     0,
1681                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1682                                     mbenc_gpe_context->curbe.offset,
1683                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1684     /* 5. Mbenc curbe output buffer */
1685     gen9_add_dri_buffer_gpe_surface(ctx,
1686                                     brc_gpe_context,
1687                                     mbenc_gpe_context->curbe.bo,
1688                                     0,
1689                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1690                                     mbenc_gpe_context->curbe.offset,
1691                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1692
1693     /* 6. BRC_PIC_STATE read buffer */
1694     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1695                                 &vme_context->res_pic_state_brc_read_buffer,
1696                                 0,
1697                                 vme_context->res_pic_state_brc_read_buffer.size,
1698                                 0,
1699                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1700
1701     /* 7. BRC_PIC_STATE write buffer */
1702     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1703                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1704                                 0,
1705                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1706                                 0,
1707                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1708
1709     /* 8. SEGMENT_STATE read buffer */
1710     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1711                                 &vme_context->res_seg_state_brc_read_buffer,
1712                                 0,
1713                                 vme_context->res_seg_state_brc_read_buffer.size,
1714                                 0,
1715                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1716
1717     /* 9. SEGMENT_STATE write buffer */
1718     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1719                                 &vme_context->res_seg_state_brc_write_buffer,
1720                                 0,
1721                                 vme_context->res_seg_state_brc_write_buffer.size,
1722                                 0,
1723                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1724
1725     /* 10. Bitstream size buffer */
1726     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1727                                 &vme_context->res_brc_bitstream_size_buffer,
1728                                 0,
1729                                 vme_context->res_brc_bitstream_size_buffer.size,
1730                                 0,
1731                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1732
1733     gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1734                                 &vme_context->res_brc_hfw_data_buffer,
1735                                 0,
1736                                 vme_context->res_brc_hfw_data_buffer.size,
1737                                 0,
1738                                 VP9_BTI_BRC_HFW_DATA_G9);
1739
1740      return;
1741 }
1742
1743 static VAStatus
1744 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1745                                struct encode_state *encode_state,
1746                                struct intel_encoder_context *encoder_context)
1747 {
1748     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1749     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1750     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1751     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1752     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1753     int mbenc_function;
1754     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1755     VAEncPictureParameterBufferVP9 *pic_param;
1756     struct gen9_vp9_state *vp9_state;
1757     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1758     struct gpe_media_object_parameter media_object_param;
1759
1760     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1761     if (!vp9_state || !vp9_state->pic_param)
1762         return VA_STATUS_ERROR_INVALID_PARAMETER;
1763
1764     pic_param = vp9_state->pic_param;
1765     // Setup VP9 MbEnc Curbe
1766     if (vp9_state->picture_coding_type) {
1767         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1768         mbenc_index = VP9_MBENC_IDX_INTER;
1769     } else {
1770         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1771         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1772     }
1773
1774     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1775
1776     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1777
1778     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1779     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1780     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1781     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1782     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1783     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1784     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1785     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1786     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1787     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1788     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1789     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1790     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1791     mbenc_curbe_param.media_state_type       = mbenc_function;
1792
1793     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1794                                 mbenc_gpe_context,
1795                                 encoder_context,
1796                                 &mbenc_curbe_param);
1797
1798     vp9_state->mbenc_curbe_set_in_brc_update = true;
1799
1800     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1801
1802     gen8_gpe_context_init(ctx, brc_gpe_context);
1803     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1804
1805     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1806
1807     // Setup BRC Update Curbe
1808     brc_update_curbe_param.media_state_type       = media_function;
1809     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1810     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1811     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1812     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1813     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1814     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1815     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1816     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1817     brc_update_curbe_param.b_used_ref             = 1;
1818     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1819     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1820     brc_update_curbe_param.mbbrc_enabled          = 0;
1821     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1822     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1823
1824     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1825                           &vp9_state->brc_init_current_target_buf_full_in_bits;
1826     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1827                           &vp9_state->brc_init_reset_buf_size_in_bits;
1828     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1829                           &vp9_state->brc_init_reset_input_bits_per_frame;
1830
1831     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1832                                    brc_gpe_context,
1833                                    encoder_context,
1834                                    &brc_update_curbe_param);
1835
1836
1837     // Check if the constant data surface is present
1838     if (vp9_state->brc_constant_buffer_supported)
1839     {
1840         char *brc_const_buffer;
1841         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1842
1843         if (!brc_const_buffer)
1844             return VA_STATUS_ERROR_OPERATION_FAILED;
1845
1846         if (vp9_state->picture_coding_type)
1847             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1848                    sizeof(vp9_brc_const_data_p_g9));
1849         else
1850             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1851                    sizeof(vp9_brc_const_data_i_g9));
1852
1853         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1854     }
1855
1856     if (pic_param->pic_flags.bits.segmentation_enabled)
1857     {
1858           //reallocate the vme_state->mb_segment_map_surface
1859           /* this will be added later */
1860     }
1861
1862     {
1863         pic_param->filter_level = 0;
1864         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1865         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1866                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1867     }
1868
1869     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1870                                      encoder_context,
1871                                      brc_gpe_context,
1872                                      mbenc_gpe_context);
1873
1874     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1875     memset(&media_object_param, 0, sizeof(media_object_param));
1876     gen9_run_kernel_media_object(ctx, encoder_context,
1877                                  brc_gpe_context,
1878                                  media_function,
1879                                  &media_object_param);
1880     return VA_STATUS_SUCCESS;
1881 }
1882
1883 static
1884 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1885                             struct encode_state *encode_state,
1886                             struct i965_gpe_context *gpe_context,
1887                             struct intel_encoder_context *encoder_context,
1888                             struct gen9_vp9_me_curbe_param *param)
1889 {
1890     vp9_me_curbe_data        *me_cmd;
1891     int enc_media_state;
1892     int                                       me_mode;
1893     unsigned int                                       width, height;
1894     uint32_t                                  l0_ref_frames;
1895     uint32_t                                  scale_factor;
1896
1897     if (param->b16xme_enabled) {
1898         if (param->use_16x_me)
1899             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1900         else
1901             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1902     } else {
1903         me_mode = VP9_ENC_ME4X_ONLY;
1904     }
1905
1906     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1907         scale_factor = 16;
1908     else
1909         scale_factor = 4;
1910
1911     if (param->use_16x_me)
1912         enc_media_state = VP9_MEDIA_STATE_16X_ME;
1913     else
1914         enc_media_state = VP9_MEDIA_STATE_4X_ME;
1915
1916     me_cmd = i965_gpe_context_map_curbe(gpe_context);
1917
1918     if (!me_cmd)
1919         return;
1920
1921     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1922
1923     me_cmd->dw1.max_num_mvs           = 0x10;
1924     me_cmd->dw1.bi_weight             = 0x00;
1925
1926     me_cmd->dw2.max_num_su            = 0x39;
1927     me_cmd->dw2.max_len_sp            = 0x39;
1928
1929     me_cmd->dw3.sub_mb_part_mask       = 0x77;
1930     me_cmd->dw3.inter_sad             = 0x00;
1931     me_cmd->dw3.intra_sad            = 0x00;
1932     me_cmd->dw3.bme_disable_fbr      = 0x01;
1933     me_cmd->dw3.sub_pel_mode         = 0x03;
1934
1935     width = param->frame_width / scale_factor;
1936     height = param->frame_height / scale_factor;
1937
1938     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
1939     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
1940
1941     me_cmd->dw5.ref_width            = 0x30;
1942     me_cmd->dw5.ref_height           = 0x28;
1943
1944     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1945         me_cmd->dw6.write_distortions = 0x01;
1946
1947     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1948     me_cmd->dw6.super_combine_dist    = 0x5;
1949     me_cmd->dw6.max_vmvr              = 0x7fc;
1950
1951     l0_ref_frames = (param->ref_frame_flag & 0x01) +
1952                     !!(param->ref_frame_flag & 0x02) +
1953                     !!(param->ref_frame_flag & 0x04);
1954     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1955     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
1956
1957     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1958     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1959
1960     me_cmd->dw15.mv_shift_factor        = 0x02;
1961
1962     {
1963         memcpy((void *)((char *)me_cmd + 64),
1964                vp9_diamond_ime_search_path_delta,
1965                sizeof(vp9_diamond_ime_search_path_delta));
1966     }
1967
1968
1969     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
1970     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1971     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
1972     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1973     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
1974     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
1975
1976     i965_gpe_context_unmap_curbe(gpe_context);
1977 }
1978
1979 static void
1980 gen9_vp9_send_me_surface(VADriverContextP ctx,
1981                          struct encode_state *encode_state,
1982                          struct i965_gpe_context *gpe_context,
1983                          struct intel_encoder_context *encoder_context,
1984                          struct gen9_vp9_me_surface_param *param)
1985 {
1986     struct i965_driver_data *i965 = i965_driver_data(ctx);
1987     struct object_surface *obj_surface;
1988     struct gen9_surface_vp9 *vp9_priv_surface;
1989     struct object_surface *input_surface;
1990     struct i965_gpe_resource *gpe_resource;
1991     int ref_bti;
1992
1993     obj_surface = SURFACE(param->curr_pic);
1994
1995     if (!obj_surface || !obj_surface->private_data)
1996         return;
1997
1998     vp9_priv_surface = obj_surface->private_data;
1999     if (param->use_16x_me)
2000     {
2001         gpe_resource = param->pres_16x_memv_data_buffer;
2002     }
2003     else
2004     {
2005         gpe_resource = param->pres_4x_memv_data_buffer;
2006     }
2007
2008     gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2009                                    gpe_resource,
2010                                    1,
2011                                    I965_SURFACEFORMAT_R8_UNORM,
2012                                    VP9_BTI_ME_MV_DATA_SURFACE);
2013
2014     if (param->b16xme_enabled) {
2015         gpe_resource = param->pres_16x_memv_data_buffer;
2016         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2017                                        gpe_resource,
2018                                        1,
2019                                        I965_SURFACEFORMAT_R8_UNORM,
2020                                        VP9_BTI_16XME_MV_DATA_SURFACE);
2021     }
2022
2023     if (!param->use_16x_me) {
2024         gpe_resource = param->pres_me_brc_distortion_buffer;
2025
2026         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2027                                        gpe_resource,
2028                                        1,
2029                                        I965_SURFACEFORMAT_R8_UNORM,
2030                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2031
2032         gpe_resource = param->pres_me_distortion_buffer;
2033
2034         gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2035                                        gpe_resource,
2036                                        1,
2037                                        I965_SURFACEFORMAT_R8_UNORM,
2038                                        VP9_BTI_ME_DISTORTION_SURFACE);
2039     }
2040
2041     if (param->use_16x_me)
2042         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2043     else
2044         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2045
2046     gen9_add_adv_gpe_surface(ctx, gpe_context,
2047                              input_surface,
2048                              VP9_BTI_ME_CURR_PIC_L0);
2049
2050     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2051
2052
2053     if (param->last_ref_pic) {
2054         obj_surface = param->last_ref_pic;
2055         vp9_priv_surface = obj_surface->private_data;
2056
2057         if (param->use_16x_me)
2058             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2059         else
2060             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2061
2062         if (param->dys_enabled &&
2063             ((vp9_priv_surface->frame_width != param->frame_width) ||
2064              (vp9_priv_surface->frame_height != param->frame_height))) {
2065             if (param->use_16x_me)
2066                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2067             else
2068                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2069         }
2070         gen9_add_adv_gpe_surface(ctx, gpe_context,
2071                                  input_surface,
2072                                  ref_bti);
2073         gen9_add_adv_gpe_surface(ctx, gpe_context,
2074                                  input_surface,
2075                                  ref_bti + 1);
2076         ref_bti += 2;
2077     }
2078
2079     if (param->golden_ref_pic) {
2080         obj_surface = param->golden_ref_pic;
2081         vp9_priv_surface = obj_surface->private_data;
2082
2083         if (param->use_16x_me)
2084             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2085         else
2086             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2087
2088         if (param->dys_enabled &&
2089             ((vp9_priv_surface->frame_width != param->frame_width) ||
2090              (vp9_priv_surface->frame_height != param->frame_height))) {
2091             if (param->use_16x_me)
2092                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2093             else
2094                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2095         }
2096
2097         gen9_add_adv_gpe_surface(ctx, gpe_context,
2098                                  input_surface,
2099                                  ref_bti);
2100         gen9_add_adv_gpe_surface(ctx, gpe_context,
2101                                  input_surface,
2102                                  ref_bti + 1);
2103         ref_bti += 2;
2104     }
2105
2106     if (param->alt_ref_pic) {
2107         obj_surface = param->alt_ref_pic;
2108         vp9_priv_surface = obj_surface->private_data;
2109
2110         if (param->use_16x_me)
2111             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2112         else
2113             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2114
2115         if (param->dys_enabled &&
2116             ((vp9_priv_surface->frame_width != param->frame_width) ||
2117              (vp9_priv_surface->frame_height != param->frame_height))) {
2118             if (param->use_16x_me)
2119                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2120             else
2121                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2122         }
2123         gen9_add_adv_gpe_surface(ctx, gpe_context,
2124                                  input_surface,
2125                                  ref_bti);
2126         gen9_add_adv_gpe_surface(ctx, gpe_context,
2127                                  input_surface,
2128                                  ref_bti + 1);
2129         ref_bti += 2;
2130     }
2131
2132     return;
2133 }
2134
2135 static
2136 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2137                               struct encode_state *encode_state,
2138                               struct intel_encoder_context *encoder_context,
2139                               struct i965_gpe_context *gpe_context,
2140                               int use_16x_me)
2141 {
2142     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2143     struct gen9_vp9_me_surface_param  me_surface_param;
2144     struct gen9_vp9_state *vp9_state;
2145
2146     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2147
2148     /* sScaled4xSurface surface */
2149     memset(&me_surface_param, 0, sizeof(me_surface_param));
2150     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2151     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2152     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2153     me_surface_param.curr_pic = vp9_state->curr_frame;
2154     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2155     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2156     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2157     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2158
2159     if (use_16x_me) {
2160         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2161         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2162     } else {
2163         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2164         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2165     }
2166     me_surface_param.frame_width  = vp9_state->frame_width;
2167     me_surface_param.frame_height  = vp9_state->frame_height;
2168
2169     me_surface_param.use_16x_me = use_16x_me;
2170     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2171     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2172
2173     vme_context->pfn_send_me_surface(ctx, encode_state,
2174                                      gpe_context,
2175                                      encoder_context,
2176                                      &me_surface_param);
2177     return;
2178 }
2179
2180 static VAStatus
2181 gen9_vp9_me_kernel(VADriverContextP ctx,
2182                    struct encode_state *encode_state,
2183                    struct intel_encoder_context *encoder_context,
2184                    int use_16x_me)
2185 {
2186     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2187     struct i965_gpe_context *gpe_context;
2188     int media_function;
2189     struct gen9_vp9_me_curbe_param me_curbe_param;
2190     struct gen9_vp9_state *vp9_state;
2191     struct gpe_media_object_walker_parameter media_object_walker_param;
2192     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2193
2194     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2195     if (!vp9_state || !vp9_state->pic_param)
2196         return VA_STATUS_ERROR_INVALID_PARAMETER;
2197
2198     if (use_16x_me)
2199         media_function = VP9_MEDIA_STATE_16X_ME;
2200     else
2201         media_function = VP9_MEDIA_STATE_4X_ME;
2202
2203     gpe_context = &(vme_context->me_context.gpe_context);
2204
2205     gen8_gpe_context_init(ctx, gpe_context);
2206     gen9_gpe_reset_binding_table(ctx, gpe_context);
2207
2208     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2209     me_curbe_param.ppic_param = vp9_state->pic_param;
2210     me_curbe_param.pseq_param = vp9_state->seq_param;
2211     me_curbe_param.frame_width = vp9_state->frame_width;
2212     me_curbe_param.frame_height = vp9_state->frame_height;
2213     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2214     me_curbe_param.use_16x_me = use_16x_me;
2215     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2216     vme_context->pfn_set_curbe_me(ctx, encode_state,
2217                                   gpe_context,
2218                                   encoder_context,
2219                                   &me_curbe_param);
2220
2221     gen9_me_add_surfaces_vp9(ctx, encode_state,
2222                              encoder_context,
2223                              gpe_context,
2224                              use_16x_me);
2225
2226     gen8_gpe_setup_interface_data(ctx, gpe_context);
2227
2228     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2229     if (use_16x_me) {
2230         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2231         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2232     } else {
2233         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2234         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2235     }
2236     kernel_walker_param.no_dependency = 1;
2237
2238     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2239
2240     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2241                                         gpe_context,
2242                                         media_function,
2243                                         &media_object_walker_param);
2244
2245     return VA_STATUS_SUCCESS;
2246 }
2247
2248 static void
2249 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2250                             struct encode_state *encode_state,
2251                             struct i965_gpe_context *gpe_context,
2252                             struct intel_encoder_context *encoder_context,
2253                             struct gen9_vp9_scaling_curbe_param *curbe_param)
2254 {
2255     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2256
2257     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2258
2259     if (!curbe_cmd)
2260         return;
2261
2262     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2263
2264     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2265     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2266
2267     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2268     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2269
2270
2271     curbe_cmd->dw6.enable_mb_variance_output = 0;
2272     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2273     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2274
2275     if (curbe_param->mb_variance_output_enabled ||
2276         curbe_param->mb_pixel_average_output_enabled)
2277     {
2278         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2279     }
2280
2281     i965_gpe_context_unmap_curbe(gpe_context);
2282     return;
2283 }
2284
2285 static void
2286 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2287                               struct encode_state *encode_state,
2288                               struct i965_gpe_context *gpe_context,
2289                               struct intel_encoder_context *encoder_context,
2290                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2291 {
2292     vp9_bti_scaling_offset *scaling_bti;
2293     unsigned int surface_format;
2294
2295     scaling_bti = scaling_surface_param->p_scaling_bti;
2296
2297     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2298         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2299     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2300         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2301     else
2302         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2303
2304     gen9_add_2d_gpe_surface(ctx, gpe_context,
2305                             scaling_surface_param->input_surface,
2306                             0, 1, surface_format,
2307                             scaling_bti->scaling_frame_src_y);
2308
2309     gen9_add_2d_gpe_surface(ctx, gpe_context,
2310                             scaling_surface_param->output_surface,
2311                             0, 1, surface_format,
2312                             scaling_bti->scaling_frame_dst_y);
2313
2314
2315     return;
2316 }
2317
2318 static VAStatus
2319 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2320                         struct encode_state *encode_state,
2321                         struct intel_encoder_context *encoder_context,
2322                         int use_16x_scaling)
2323 {
2324     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2325     struct i965_gpe_context *gpe_context;
2326     int media_function;
2327     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2328     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2329     struct gen9_vp9_state *vp9_state;
2330     VAEncPictureParameterBufferVP9  *pic_param;
2331     struct gpe_media_object_walker_parameter media_object_walker_param;
2332     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2333     struct object_surface *obj_surface;
2334     struct object_surface *input_surface, *output_surface;
2335     struct gen9_surface_vp9 *vp9_priv_surface;
2336     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2337     unsigned int input_frame_width, input_frame_height;
2338     unsigned int output_frame_width, output_frame_height;
2339
2340     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2341     if (!vp9_state || !vp9_state->pic_param)
2342         return VA_STATUS_ERROR_INVALID_PARAMETER;
2343
2344     pic_param = vp9_state->pic_param;
2345
2346     if (use_16x_scaling)
2347         media_function = VP9_MEDIA_STATE_16X_SCALING;
2348     else
2349         media_function = VP9_MEDIA_STATE_4X_SCALING;
2350
2351     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2352
2353     gen8_gpe_context_init(ctx, gpe_context);
2354     gen9_gpe_reset_binding_table(ctx, gpe_context);
2355
2356     obj_surface = encode_state->reconstructed_object;
2357     vp9_priv_surface = obj_surface->private_data;
2358
2359     if (use_16x_scaling)
2360     {
2361         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2362         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2363
2364         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2365         input_frame_width           = vp9_state->frame_width_4x;
2366         input_frame_height          = vp9_state->frame_height_4x;
2367
2368         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2369         output_frame_width          = vp9_state->frame_width_16x;
2370         output_frame_height         = vp9_state->frame_height_16x;
2371     } else {
2372         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2373         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2374
2375         if (vp9_state->dys_in_use &&
2376                ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2377                 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2378             input_surface               = vp9_priv_surface->dys_surface_obj;
2379         else
2380             input_surface               = encode_state->input_yuv_object;
2381
2382         input_frame_width           = vp9_state->frame_width;
2383         input_frame_height          = vp9_state->frame_height;
2384
2385         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2386         output_frame_width          = vp9_state->frame_width_4x;
2387         output_frame_height         = vp9_state->frame_height_4x;
2388     }
2389
2390     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2391
2392     scaling_curbe_param.input_picture_width  = input_frame_width;
2393     scaling_curbe_param.input_picture_height = input_frame_height;
2394
2395     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2396     scaling_curbe_param.use_32x_scaling = 0;
2397
2398     if (use_16x_scaling)
2399         scaling_curbe_param.mb_variance_output_enabled = 0;
2400     else
2401         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2402
2403     scaling_curbe_param.blk8x8_stat_enabled = 0;
2404
2405     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2406                                   gpe_context,
2407                                   encoder_context,
2408                                   &scaling_curbe_param);
2409
2410     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2411     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2412     scaling_surface_param.input_surface                      = input_surface;
2413     scaling_surface_param.input_frame_width                  = input_frame_width;
2414     scaling_surface_param.input_frame_height                 = input_frame_height;
2415
2416     scaling_surface_param.output_surface                     = output_surface;
2417     scaling_surface_param.output_frame_width                 = output_frame_width;
2418     scaling_surface_param.output_frame_height                = output_frame_height;
2419     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2420     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2421
2422     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2423                                           gpe_context,
2424                                           encoder_context,
2425                                           &scaling_surface_param);
2426
2427     gen8_gpe_setup_interface_data(ctx, gpe_context);
2428
2429     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2430     /* the scaling is based on 8x8 blk level */
2431     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2432     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2433     kernel_walker_param.no_dependency = 1;
2434
2435     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2436
2437     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2438                                         gpe_context,
2439                                         media_function,
2440                                         &media_object_walker_param);
2441
2442     return VA_STATUS_SUCCESS;
2443 }
2444
2445 static void
2446 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2447 {
2448     struct gen9_sampler_8x8_avs                *sampler_cmd;
2449
2450     if (!gpe_context)
2451         return;
2452
2453     dri_bo_map(gpe_context->sampler.bo, 1);
2454
2455     if (!gpe_context->sampler.bo->virtual)
2456         return;
2457
2458     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2459        (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2460
2461     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2462
2463     sampler_cmd->dw0.r3c_coefficient                      = 15;
2464     sampler_cmd->dw0.r3x_coefficient                      = 6;
2465     sampler_cmd->dw0.strong_edge_threshold                = 8;
2466     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2467     sampler_cmd->dw0.gain_factor                          = 32;
2468
2469     sampler_cmd->dw2.r5c_coefficient                     = 3;
2470     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2471     sampler_cmd->dw2.r5x_coefficient                     = 9;
2472     sampler_cmd->dw2.strong_edge_weight                  = 6;
2473     sampler_cmd->dw2.regular_weight                      = 3;
2474     sampler_cmd->dw2.non_edge_weight                     = 2;
2475     sampler_cmd->dw2.global_noise_estimation             = 255;
2476
2477     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2478     sampler_cmd->dw3.cos_alpha                           = 79;
2479     sampler_cmd->dw3.sin_alpha                           = 101;
2480
2481     sampler_cmd->dw5.diamond_du                           = 0;
2482     sampler_cmd->dw5.hs_margin                            = 3;
2483     sampler_cmd->dw5.diamond_alpha                        = 100;
2484
2485     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2486
2487     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2488
2489     sampler_cmd->dw10.y_slope2                            = 24;
2490     sampler_cmd->dw10.s0l                                 = 1792;
2491
2492     sampler_cmd->dw12.y_slope1                            = 24;
2493
2494     sampler_cmd->dw14.s0u                                = 256;
2495
2496     sampler_cmd->dw15.s2u                                = 1792;
2497     sampler_cmd->dw15.s1u                                = 0;
2498
2499     memcpy(sampler_cmd->coefficients,
2500            &gen9_vp9_avs_coeffs[0],
2501            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2502
2503     sampler_cmd->dw152.default_sharpness_level     = 255;
2504     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2505     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2506     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2507     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2508
2509     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2510     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2511     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2512
2513     memcpy(sampler_cmd->extra_coefficients,
2514            &gen9_vp9_avs_coeffs[17 * 8],
2515            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2516
2517     dri_bo_unmap(gpe_context->sampler.bo);
2518 }
2519
2520 static void
2521 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2522                        struct encode_state *encode_state,
2523                        struct i965_gpe_context *gpe_context,
2524                        struct intel_encoder_context *encoder_context,
2525                        struct gen9_vp9_dys_curbe_param *curbe_param)
2526 {
2527     vp9_dys_curbe_data  *curbe_cmd;
2528
2529     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2530
2531     if (!curbe_cmd)
2532         return;
2533
2534     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2535
2536     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2537     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2538
2539     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2540     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2541
2542     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2543     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2544
2545     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2546     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2547     curbe_cmd->dw18.avs_sample_idx            = 0;
2548
2549     i965_gpe_context_unmap_curbe(gpe_context);
2550 }
2551
2552 static void
2553 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2554                        struct encode_state *encode_state,
2555                        struct i965_gpe_context *gpe_context,
2556                        struct intel_encoder_context *encoder_context,
2557                        struct gen9_vp9_dys_surface_param *surface_param)
2558 {
2559
2560     if (surface_param->input_frame)
2561         gen9_add_adv_gpe_surface(ctx,
2562                                  gpe_context,
2563                                  surface_param->input_frame,
2564                                  VP9_BTI_DYS_INPUT_NV12);
2565
2566     if (surface_param->output_frame) {
2567         gen9_add_2d_gpe_surface(ctx,
2568                                 gpe_context,
2569                                 surface_param->output_frame,
2570                                 0,
2571                                 1,
2572                                 I965_SURFACEFORMAT_R8_UNORM,
2573                                 VP9_BTI_DYS_OUTPUT_Y);
2574
2575         gen9_add_2d_gpe_surface(ctx,
2576                                 gpe_context,
2577                                 surface_param->output_frame,
2578                                 1,
2579                                 1,
2580                                 I965_SURFACEFORMAT_R16_UINT,
2581                                 VP9_BTI_DYS_OUTPUT_UV);
2582     }
2583
2584     return;
2585 }
2586
2587 static VAStatus
2588 gen9_vp9_dys_kernel(VADriverContextP ctx,
2589                         struct encode_state *encode_state,
2590                         struct intel_encoder_context *encoder_context,
2591                         gen9_vp9_dys_kernel_param *dys_kernel_param)
2592 {
2593     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2594     struct i965_gpe_context *gpe_context;
2595     int media_function;
2596     struct gen9_vp9_dys_curbe_param                 curbe_param;
2597     struct gen9_vp9_dys_surface_param               surface_param;
2598     struct gpe_media_object_walker_parameter        media_object_walker_param;
2599     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
2600     unsigned int                                    resolution_x, resolution_y;
2601
2602     media_function = VP9_MEDIA_STATE_DYS;
2603     gpe_context = &vme_context->dys_context.gpe_context;
2604
2605     //gen8_gpe_context_init(ctx, gpe_context);
2606     gen9_gpe_reset_binding_table(ctx, gpe_context);
2607
2608     /* sampler state is configured only when initializing the GPE context */
2609
2610     memset(&curbe_param, 0, sizeof(curbe_param));
2611     curbe_param.input_width   = dys_kernel_param->input_width;
2612     curbe_param.input_height  = dys_kernel_param->input_height;
2613     curbe_param.output_width = dys_kernel_param->output_width;
2614     curbe_param.output_height = dys_kernel_param->output_height;
2615     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2616                                   gpe_context,
2617                                   encoder_context,
2618                                   &curbe_param);
2619
2620     // Add surface states
2621     memset(&surface_param, 0, sizeof(surface_param));
2622     surface_param.input_frame = dys_kernel_param->input_surface;
2623     surface_param.output_frame = dys_kernel_param->output_surface;
2624     surface_param.vert_line_stride = 0;
2625     surface_param.vert_line_stride_offset = 0;
2626
2627     vme_context->pfn_send_dys_surface(ctx,
2628                                       encode_state,
2629                                       gpe_context,
2630                                       encoder_context,
2631                                       &surface_param);
2632
2633     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2634     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2635
2636     gen8_gpe_setup_interface_data(ctx, gpe_context);
2637
2638     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2639     kernel_walker_param.resolution_x = resolution_x;
2640     kernel_walker_param.resolution_y = resolution_y;
2641     kernel_walker_param.no_dependency = 1;
2642
2643     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2644
2645     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2646                                         gpe_context,
2647                                         media_function,
2648                                         &media_object_walker_param);
2649
2650     return VA_STATUS_SUCCESS;
2651 }
2652
2653 static VAStatus
2654 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2655                           struct encode_state *encode_state,
2656                           struct intel_encoder_context *encoder_context)
2657 {
2658     struct gen9_vp9_state *vp9_state;
2659     VAEncPictureParameterBufferVP9  *pic_param;
2660     gen9_vp9_dys_kernel_param dys_kernel_param;
2661     struct object_surface *obj_surface;
2662     struct object_surface *input_surface, *output_surface;
2663     struct gen9_surface_vp9 *vp9_priv_surface;
2664
2665     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2666
2667     if (!vp9_state || !vp9_state->pic_param)
2668         return VA_STATUS_ERROR_INVALID_PARAMETER;
2669
2670     pic_param = vp9_state->pic_param;
2671
2672     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2673         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2674         input_surface = encode_state->input_yuv_object;
2675         obj_surface = encode_state->reconstructed_object;
2676         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2677         output_surface = vp9_priv_surface->dys_surface_obj;
2678
2679         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2680         dys_kernel_param.input_width = pic_param->frame_width_src;
2681         dys_kernel_param.input_height = pic_param->frame_height_src;
2682         dys_kernel_param.input_surface = input_surface;
2683         dys_kernel_param.output_width = pic_param->frame_width_dst;
2684         dys_kernel_param.output_height = pic_param->frame_height_dst;
2685         dys_kernel_param.output_surface = output_surface;
2686         gen9_vp9_dys_kernel(ctx, encode_state,
2687                             encoder_context,
2688                             &dys_kernel_param);
2689     }
2690
2691     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2692          vp9_state->last_ref_obj) {
2693         obj_surface = vp9_state->last_ref_obj;
2694         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2695
2696         input_surface = obj_surface;
2697         output_surface = vp9_priv_surface->dys_surface_obj;
2698
2699         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2700         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2701         dys_kernel_param.input_surface = input_surface;
2702
2703         dys_kernel_param.output_width = pic_param->frame_width_dst;
2704         dys_kernel_param.output_height = pic_param->frame_height_dst;
2705         dys_kernel_param.output_surface = output_surface;
2706
2707         gen9_vp9_dys_kernel(ctx, encode_state,
2708                             encoder_context,
2709                             &dys_kernel_param);
2710
2711         if (vp9_state->hme_enabled) {
2712             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2713             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2714             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2715
2716             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2717             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2718             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2719
2720             gen9_vp9_dys_kernel(ctx, encode_state,
2721                                 encoder_context,
2722                                 &dys_kernel_param);
2723
2724             /* Does it really need to do the 16x HME if the
2725              * resolution is different?
2726              * Maybe it should be restricted
2727              */
2728             if (vp9_state->b16xme_enabled) {
2729                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2730                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2731                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2732
2733                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2734                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2735                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2736
2737                 gen9_vp9_dys_kernel(ctx, encode_state,
2738                                     encoder_context,
2739                                     &dys_kernel_param);
2740             }
2741         }
2742     }
2743
2744     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2745          vp9_state->golden_ref_obj) {
2746         obj_surface = vp9_state->golden_ref_obj;
2747         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2748
2749         input_surface = obj_surface;
2750         output_surface = vp9_priv_surface->dys_surface_obj;
2751
2752         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2753         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2754         dys_kernel_param.input_surface = input_surface;
2755
2756         dys_kernel_param.output_width = pic_param->frame_width_dst;
2757         dys_kernel_param.output_height = pic_param->frame_height_dst;
2758         dys_kernel_param.output_surface = output_surface;
2759
2760         gen9_vp9_dys_kernel(ctx, encode_state,
2761                             encoder_context,
2762                             &dys_kernel_param);
2763
2764         if (vp9_state->hme_enabled) {
2765             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2766             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2767             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2768
2769             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2770             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2771             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2772
2773             gen9_vp9_dys_kernel(ctx, encode_state,
2774                                 encoder_context,
2775                                 &dys_kernel_param);
2776
2777             /* Does it really need to do the 16x HME if the
2778              * resolution is different?
2779              * Maybe it should be restricted
2780              */
2781             if (vp9_state->b16xme_enabled) {
2782                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2783                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2784                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2785
2786                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2787                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2788                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2789
2790                 gen9_vp9_dys_kernel(ctx, encode_state,
2791                                     encoder_context,
2792                                     &dys_kernel_param);
2793             }
2794         }
2795     }
2796
2797     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2798          vp9_state->alt_ref_obj) {
2799         obj_surface = vp9_state->alt_ref_obj;
2800         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2801
2802         input_surface = obj_surface;
2803         output_surface = vp9_priv_surface->dys_surface_obj;
2804
2805         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2806         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2807         dys_kernel_param.input_surface = input_surface;
2808
2809         dys_kernel_param.output_width = pic_param->frame_width_dst;
2810         dys_kernel_param.output_height = pic_param->frame_height_dst;
2811         dys_kernel_param.output_surface = output_surface;
2812
2813         gen9_vp9_dys_kernel(ctx, encode_state,
2814                             encoder_context,
2815                             &dys_kernel_param);
2816
2817         if (vp9_state->hme_enabled) {
2818             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2819             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2820             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2821
2822             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2823             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2824             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2825
2826             gen9_vp9_dys_kernel(ctx, encode_state,
2827                                 encoder_context,
2828                                 &dys_kernel_param);
2829
2830             /* Does it really need to do the 16x HME if the
2831              * resolution is different?
2832              * Maybe it should be restricted
2833              */
2834             if (vp9_state->b16xme_enabled) {
2835                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2836                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2837                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2838
2839                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2840                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2841                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2842
2843                 gen9_vp9_dys_kernel(ctx, encode_state,
2844                                     encoder_context,
2845                                     &dys_kernel_param);
2846             }
2847         }
2848     }
2849
2850     return VA_STATUS_SUCCESS;
2851 }
2852
2853 static void
2854 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2855                          struct encode_state *encode_state,
2856                          struct i965_gpe_context *gpe_context,
2857                          struct intel_encoder_context *encoder_context,
2858                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
2859 {
2860     struct gen9_vp9_state *vp9_state;
2861     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2862     vp9_mbenc_curbe_data  *curbe_cmd;
2863     VAEncPictureParameterBufferVP9  *pic_param;
2864     int i, segment_count;
2865     int seg_qindex;
2866     struct object_surface *obj_surface;
2867     struct gen9_surface_vp9 *vp9_priv_surface;
2868
2869     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2870
2871     if (!vp9_state || !vp9_state->pic_param)
2872         return;
2873
2874     pic_param = curbe_param->ppic_param;
2875     seg_param = curbe_param->psegment_param;
2876
2877     if (!seg_param) {
2878         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2879         seg_param = &tmp_seg_param;
2880     }
2881
2882     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2883
2884     if (!curbe_cmd)
2885         return;
2886
2887     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2888
2889     if (vp9_state->dys_in_use)
2890     {
2891         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2892         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2893     }
2894     else
2895     {
2896         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2897         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2898     }
2899
2900     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2901
2902     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2903     if (pic_param->pic_flags.bits.segmentation_enabled)
2904         segment_count = 8;
2905     else
2906         segment_count = 1;
2907
2908     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2909
2910     //right now set them to normal settings
2911     if (curbe_param->picture_coding_type)
2912     {
2913         switch (vp9_state->target_usage)
2914         {
2915         case INTEL_ENC_VP9_TU_QUALITY:
2916             curbe_cmd->dw1.min_16for32_check    = 0x00;
2917             curbe_cmd->dw2.multi_pred           = 0x02;
2918             curbe_cmd->dw2.len_sp               = 0x39;
2919             curbe_cmd->dw2.search_x             = 0x30;
2920             curbe_cmd->dw2.search_y             = 0x28;
2921             curbe_cmd->dw3.min_ref_for32_check = 0x01;
2922             curbe_cmd->dw4.skip16_threshold     = 0x000A;
2923             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2924
2925             memcpy(&curbe_cmd->dw16,
2926                     vp9_diamond_ime_search_path_delta,
2927                     14 * sizeof(unsigned int));
2928             break;
2929         case INTEL_ENC_VP9_TU_PERFORMANCE:
2930             curbe_cmd->dw1.min_16for32_check    = 0x02;
2931             curbe_cmd->dw2.multi_pred           = 0x00;
2932             curbe_cmd->dw2.len_sp               = 0x10;
2933             curbe_cmd->dw2.search_x             = 0x20;
2934             curbe_cmd->dw2.search_y             = 0x20;
2935             curbe_cmd->dw3.min_ref_for32_check = 0x03;
2936             curbe_cmd->dw4.skip16_threshold     = 0x0014;
2937             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2938
2939             memcpy(&curbe_cmd->dw16,
2940                     vp9_fullspiral_ime_search_path_delta,
2941                     14 * sizeof(unsigned int));
2942
2943             break;
2944         default:  // normal settings
2945             curbe_cmd->dw1.min_16for32_check     = 0x01;
2946             curbe_cmd->dw2.multi_pred           = 0x00;
2947             curbe_cmd->dw2.len_sp               = 0x19;
2948             curbe_cmd->dw2.search_x             = 0x30;
2949             curbe_cmd->dw2.search_y             = 0x28;
2950             curbe_cmd->dw3.min_ref_for32_check = 0x02;
2951             curbe_cmd->dw4.skip16_threshold     = 0x000F;
2952             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2953
2954             memcpy(&curbe_cmd->dw16,
2955                     vp9_diamond_ime_search_path_delta,
2956                     14 * sizeof(unsigned int));
2957             break;
2958         }
2959
2960         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
2961         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
2962         // co-located predictor must be disabled when dynamic scaling is enabled
2963         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
2964     }
2965
2966     curbe_cmd->dw5.inter_round = 0;
2967     curbe_cmd->dw5.intra_round = 4;
2968     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2969
2970     for (i = 0; i < segment_count; i++)
2971     {
2972         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2973                      + seg_param->seg_data[i].segment_qindex_delta;
2974
2975         seg_qindex = CLAMP(0, 255, seg_qindex);
2976
2977         if (curbe_param->picture_coding_type)
2978             memcpy(&curbe_cmd->segments[i],
2979                    &intel_vp9_costlut_p[seg_qindex * 16],
2980                    16 * sizeof(unsigned int));
2981         else
2982             memcpy(&curbe_cmd->segments[i],
2983                    &intel_vp9_costlut_key[seg_qindex * 16],
2984                    16 * sizeof(unsigned int));
2985     }
2986
2987     if (curbe_param->picture_coding_type)
2988     {
2989         if (curbe_cmd->dw3.multi_ref_qp_check)
2990         {
2991             if (curbe_param->ref_frame_flag & 0x01)
2992             {
2993                 obj_surface = curbe_param->last_ref_obj;
2994                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2995                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2996             }
2997
2998             if (curbe_param->ref_frame_flag & 0x02)
2999             {
3000                 obj_surface = curbe_param->golden_ref_obj;
3001                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3002                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3003             }
3004
3005             if (curbe_param->ref_frame_flag & 0x04)
3006             {
3007                 obj_surface = curbe_param->alt_ref_obj;
3008                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3009                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3010             }
3011         }
3012     }
3013     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
3014     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
3015     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
3016     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3017     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
3018     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
3019     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
3020     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
3021     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
3022     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
3023     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
3024     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
3025
3026     i965_gpe_context_unmap_curbe(gpe_context);
3027     return;
3028 }
3029
3030 static void
3031 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
3032                             struct encode_state *encode_state,
3033                             struct i965_gpe_context *gpe_context,
3034                             struct intel_encoder_context *encoder_context,
3035                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
3036 {
3037     struct gen9_vp9_state *vp9_state;
3038     unsigned int            res_size;
3039     unsigned int            frame_width_in_sb, frame_height_in_sb;
3040     struct object_surface   *obj_surface, *tmp_input;
3041     struct gen9_surface_vp9 *vp9_priv_surface;
3042     int media_function;
3043
3044     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3045
3046     if (!vp9_state || !vp9_state->pic_param)
3047         return;
3048
3049     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3050     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3051     media_function = mbenc_param->media_state_type;
3052
3053     switch (media_function)
3054     {
3055     case VP9_MEDIA_STATE_MBENC_I_32x32:
3056     {
3057         obj_surface = mbenc_param->curr_frame_obj;
3058
3059         gen9_add_2d_gpe_surface(ctx,
3060                                 gpe_context,
3061                                 obj_surface,
3062                                 0,
3063                                 1,
3064                                 I965_SURFACEFORMAT_R8_UNORM,
3065                                 VP9_BTI_MBENC_CURR_Y_G9);
3066
3067         gen9_add_2d_gpe_surface(ctx,
3068                                 gpe_context,
3069                                 obj_surface,
3070                                 1,
3071                                 1,
3072                                 I965_SURFACEFORMAT_R16_UINT,
3073                                 VP9_BTI_MBENC_CURR_UV_G9);
3074
3075
3076         if (mbenc_param->segmentation_enabled)
3077         {
3078            gen9_add_buffer_2d_gpe_surface(ctx,
3079                                    gpe_context,
3080                                    mbenc_param->pres_segmentation_map,
3081                                    1,
3082                                    I965_SURFACEFORMAT_R8_UNORM,
3083                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3084
3085         }
3086
3087         res_size = 16 * mbenc_param->frame_width_in_mb *
3088                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3089         gen9_add_buffer_gpe_surface(ctx,
3090                                     gpe_context,
3091                                     mbenc_param->pres_mode_decision,
3092                                     0,
3093                                     res_size / 4,
3094                                     0,
3095                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3096
3097         break;
3098     }
3099     case VP9_MEDIA_STATE_MBENC_I_16x16:
3100     {
3101         obj_surface = mbenc_param->curr_frame_obj;
3102
3103         gen9_add_2d_gpe_surface(ctx,
3104                                 gpe_context,
3105                                 obj_surface,
3106                                 0,
3107                                 1,
3108                                 I965_SURFACEFORMAT_R8_UNORM,
3109                                 VP9_BTI_MBENC_CURR_Y_G9);
3110
3111         gen9_add_2d_gpe_surface(ctx,
3112                                 gpe_context,
3113                                 obj_surface,
3114                                 1,
3115                                 1,
3116                                 I965_SURFACEFORMAT_R16_UINT,
3117                                 VP9_BTI_MBENC_CURR_UV_G9);
3118
3119         gen9_add_adv_gpe_surface(ctx, gpe_context,
3120                                  obj_surface,
3121                                  VP9_BTI_MBENC_CURR_NV12_G9);
3122
3123         if (mbenc_param->segmentation_enabled)
3124         {
3125            gen9_add_buffer_2d_gpe_surface(ctx,
3126                                    gpe_context,
3127                                    mbenc_param->pres_segmentation_map,
3128                                    1,
3129                                    I965_SURFACEFORMAT_R8_UNORM,
3130                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3131
3132         }
3133
3134         res_size = 16 * mbenc_param->frame_width_in_mb *
3135                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3136         gen9_add_buffer_gpe_surface(ctx,
3137                                     gpe_context,
3138                                     mbenc_param->pres_mode_decision,
3139                                     0,
3140                                     res_size / 4,
3141                                     0,
3142                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3143
3144         res_size = 160;
3145
3146         gen9_add_dri_buffer_gpe_surface(ctx,
3147                                         gpe_context,
3148                                         mbenc_param->gpe_context_tx->curbe.bo,
3149                                         0,
3150                                         ALIGN(res_size, 64),
3151                                         mbenc_param->gpe_context_tx->curbe.offset,
3152                                         VP9_BTI_MBENC_TX_CURBE_G9);
3153
3154         break;
3155     }
3156     case VP9_MEDIA_STATE_MBENC_P:
3157     {
3158         obj_surface = mbenc_param->curr_frame_obj;
3159
3160         gen9_add_2d_gpe_surface(ctx,
3161                                 gpe_context,
3162                                 obj_surface,
3163                                 0,
3164                                 1,
3165                                 I965_SURFACEFORMAT_R8_UNORM,
3166                                 VP9_BTI_MBENC_CURR_Y_G9);
3167
3168         gen9_add_2d_gpe_surface(ctx, gpe_context,
3169                                 obj_surface,
3170                                 1,
3171                                 1,
3172                                 I965_SURFACEFORMAT_R16_UINT,
3173                                 VP9_BTI_MBENC_CURR_UV_G9);
3174
3175         gen9_add_adv_gpe_surface(ctx, gpe_context,
3176                                  obj_surface,
3177                                  VP9_BTI_MBENC_CURR_NV12_G9);
3178
3179         if (mbenc_param->last_ref_obj)
3180         {
3181             obj_surface = mbenc_param->last_ref_obj;
3182             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3183
3184             if (vp9_state->dys_in_use &&
3185                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3186                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3187                 tmp_input = vp9_priv_surface->dys_surface_obj;
3188             else
3189                 tmp_input = obj_surface;
3190
3191             gen9_add_adv_gpe_surface(ctx, gpe_context,
3192                                  tmp_input,
3193                                  VP9_BTI_MBENC_LAST_NV12_G9);
3194
3195             gen9_add_adv_gpe_surface(ctx, gpe_context,
3196                                  tmp_input,
3197                                  VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3198
3199         }
3200
3201         if (mbenc_param->golden_ref_obj)
3202         {
3203             obj_surface = mbenc_param->golden_ref_obj;
3204             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3205
3206             if (vp9_state->dys_in_use &&
3207                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3208                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3209                 tmp_input = vp9_priv_surface->dys_surface_obj;
3210             else
3211                 tmp_input = obj_surface;
3212
3213             gen9_add_adv_gpe_surface(ctx, gpe_context,
3214                                  tmp_input,
3215                                  VP9_BTI_MBENC_GOLD_NV12_G9);
3216
3217             gen9_add_adv_gpe_surface(ctx, gpe_context,
3218                                  tmp_input,
3219                                  VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3220
3221         }
3222
3223         if (mbenc_param->alt_ref_obj)
3224         {
3225             obj_surface = mbenc_param->alt_ref_obj;
3226             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3227
3228             if (vp9_state->dys_in_use &&
3229                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3230                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3231                 tmp_input = vp9_priv_surface->dys_surface_obj;
3232             else
3233                 tmp_input = obj_surface;
3234
3235             gen9_add_adv_gpe_surface(ctx, gpe_context,
3236                                  tmp_input,
3237                                  VP9_BTI_MBENC_ALTREF_NV12_G9);
3238
3239             gen9_add_adv_gpe_surface(ctx, gpe_context,
3240                                  tmp_input,
3241                                  VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3242
3243         }
3244
3245         if (mbenc_param->hme_enabled)
3246         {
3247             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3248                                        mbenc_param->ps4x_memv_data_buffer,
3249                                        1,
3250                                        I965_SURFACEFORMAT_R8_UNORM,
3251                                        VP9_BTI_MBENC_HME_MV_DATA_G9);
3252
3253             gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3254                                        mbenc_param->ps4x_memv_distortion_buffer,
3255                                        1,
3256                                        I965_SURFACEFORMAT_R8_UNORM,
3257                                        VP9_BTI_MBENC_HME_DISTORTION_G9);
3258         }
3259
3260         if (mbenc_param->segmentation_enabled)
3261         {
3262            gen9_add_buffer_2d_gpe_surface(ctx,
3263                                    gpe_context,
3264                                    mbenc_param->pres_segmentation_map,
3265                                    1,
3266                                    I965_SURFACEFORMAT_R8_UNORM,
3267                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3268
3269         }
3270
3271         res_size = 16 * mbenc_param->frame_width_in_mb *
3272                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3273         gen9_add_buffer_gpe_surface(ctx,
3274                                     gpe_context,
3275                                     mbenc_param->pres_mode_decision_prev,
3276                                     0,
3277                                     res_size / 4,
3278                                     0,
3279                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3280
3281         gen9_add_buffer_gpe_surface(ctx,
3282                                     gpe_context,
3283                                     mbenc_param->pres_mode_decision,
3284                                     0,
3285                                     res_size / 4,
3286                                     0,
3287                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3288
3289         gen9_add_buffer_2d_gpe_surface(ctx,
3290                                    gpe_context,
3291                                    mbenc_param->pres_output_16x16_inter_modes,
3292                                    1,
3293                                    I965_SURFACEFORMAT_R8_UNORM,
3294                                    VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3295
3296         res_size = 160;
3297
3298         gen9_add_dri_buffer_gpe_surface(ctx,
3299                                         gpe_context,
3300                                         mbenc_param->gpe_context_tx->curbe.bo,
3301                                         0,
3302                                         ALIGN(res_size, 64),
3303                                         mbenc_param->gpe_context_tx->curbe.offset,
3304                                         VP9_BTI_MBENC_TX_CURBE_G9);
3305
3306
3307         break;
3308     }
3309     case VP9_MEDIA_STATE_MBENC_TX:
3310     {
3311         obj_surface = mbenc_param->curr_frame_obj;
3312
3313         gen9_add_2d_gpe_surface(ctx,
3314                                 gpe_context,
3315                                 obj_surface,
3316                                 0,
3317                                 1,
3318                                 I965_SURFACEFORMAT_R8_UNORM,
3319                                 VP9_BTI_MBENC_CURR_Y_G9);
3320
3321         gen9_add_2d_gpe_surface(ctx,
3322                                 gpe_context,
3323                                 obj_surface,
3324                                 1,
3325                                 1,
3326                                 I965_SURFACEFORMAT_R16_UINT,
3327                                 VP9_BTI_MBENC_CURR_UV_G9);
3328
3329         if (mbenc_param->segmentation_enabled)
3330         {
3331            gen9_add_buffer_2d_gpe_surface(ctx,
3332                                    gpe_context,
3333                                    mbenc_param->pres_segmentation_map,
3334                                    1,
3335                                    I965_SURFACEFORMAT_R8_UNORM,
3336                                    VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3337
3338         }
3339
3340         res_size = 16 * mbenc_param->frame_width_in_mb *
3341                  mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3342         gen9_add_buffer_gpe_surface(ctx,
3343                                     gpe_context,
3344                                     mbenc_param->pres_mode_decision,
3345                                     0,
3346                                     res_size / 4,
3347                                     0,
3348                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3349
3350         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3351         gen9_add_buffer_gpe_surface(ctx,
3352                                     gpe_context,
3353                                     mbenc_param->pres_mb_code_surface,
3354                                     0,
3355                                     res_size / 4,
3356                                     0,
3357                                     VP9_BTI_MBENC_PAK_DATA_G9);
3358
3359         // CU Record
3360         res_size = frame_width_in_sb * frame_height_in_sb *
3361                    64 * 16 * sizeof(unsigned int);
3362
3363         gen9_add_buffer_gpe_surface(ctx,
3364                                     gpe_context,
3365                                     mbenc_param->pres_mb_code_surface,
3366                                     0,
3367                                     res_size / 4,
3368                                     mbenc_param->mb_data_offset,
3369                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3370     }
3371     default:
3372         break;
3373     }
3374
3375     return;
3376 }
3377
3378 static VAStatus
3379 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3380                       struct encode_state *encode_state,
3381                       struct intel_encoder_context *encoder_context,
3382                       int media_function)
3383 {
3384     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3385     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3386     struct gpe_media_object_walker_parameter        media_object_walker_param;
3387     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
3388     unsigned int    resolution_x, resolution_y;
3389     struct gen9_vp9_state *vp9_state;
3390     VAEncPictureParameterBufferVP9  *pic_param;
3391     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3392     struct gen9_vp9_mbenc_surface_param             surface_param;
3393     VAStatus    va_status = VA_STATUS_SUCCESS;
3394     int mbenc_gpe_index = 0;
3395     struct object_surface *obj_surface;
3396     struct gen9_surface_vp9 *vp9_priv_surface;
3397
3398     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3399
3400     if (!vp9_state || !vp9_state->pic_param)
3401         return VA_STATUS_ERROR_ENCODING_ERROR;
3402
3403     pic_param = vp9_state->pic_param;
3404
3405     switch (media_function)
3406     {
3407         case VP9_MEDIA_STATE_MBENC_I_32x32:
3408             mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3409             break;
3410
3411         case VP9_MEDIA_STATE_MBENC_I_16x16:
3412             mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3413             break;
3414
3415         case VP9_MEDIA_STATE_MBENC_P:
3416             mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3417             break;
3418
3419         case VP9_MEDIA_STATE_MBENC_TX:
3420             mbenc_gpe_index = VP9_MBENC_IDX_TX;
3421             break;
3422
3423         default:
3424             va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3425             return va_status;
3426     }
3427
3428     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3429     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3430
3431     gen9_gpe_reset_binding_table(ctx, gpe_context);
3432
3433     // Set curbe
3434     if (!vp9_state->mbenc_curbe_set_in_brc_update)
3435     {
3436         if(media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3437            media_function == VP9_MEDIA_STATE_MBENC_P)
3438         {
3439             memset(&curbe_param, 0, sizeof(curbe_param));
3440             curbe_param.ppic_param            = vp9_state->pic_param;
3441             curbe_param.pseq_param            = vp9_state->seq_param;
3442             curbe_param.psegment_param        = vp9_state->segment_param;
3443             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3444             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3445             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3446             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3447             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3448             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3449             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3450             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3451             curbe_param.media_state_type      = media_function;
3452             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3453
3454             vme_context->pfn_set_curbe_mbenc(ctx,
3455                                              encode_state,
3456                                              gpe_context,
3457                                              encoder_context,
3458                                              &curbe_param);
3459         }
3460     }
3461
3462     memset(&surface_param, 0, sizeof(surface_param));
3463     surface_param.media_state_type             = media_function;
3464     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3465     surface_param.frame_width                  = vp9_state->frame_width;
3466     surface_param.frame_height                 = vp9_state->frame_height;
3467     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3468     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3469     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3470     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3471     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3472     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3473     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3474     surface_param.pres_mode_decision           =
3475               &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3476     surface_param.pres_mode_decision_prev      =
3477               &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3478     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3479     surface_param.pres_mbenc_curbe_buffer      = NULL;
3480     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3481     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3482     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3483     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3484     surface_param.gpe_context_tx               = tx_gpe_context;
3485     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3486
3487     obj_surface = encode_state->reconstructed_object;
3488     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3489     if (vp9_state->dys_in_use &&
3490         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3491          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3492         obj_surface = vp9_priv_surface->dys_surface_obj;
3493     } else
3494         obj_surface = encode_state->input_yuv_object;
3495
3496     surface_param.curr_frame_obj             = obj_surface;
3497
3498     vme_context->pfn_send_mbenc_surface(ctx,
3499                                         encode_state,
3500                                         gpe_context,
3501                                         encoder_context,
3502                                         &surface_param);
3503
3504     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3505         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3506         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3507     } else {
3508         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3509         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3510     }
3511
3512     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3513     kernel_walker_param.resolution_x = resolution_x;
3514     kernel_walker_param.resolution_y = resolution_y;
3515
3516     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3517         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3518         kernel_walker_param.use_scoreboard = 1;
3519         kernel_walker_param.no_dependency = 0;
3520         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3521     } else {
3522         kernel_walker_param.use_scoreboard = 0;
3523         kernel_walker_param.no_dependency = 1;
3524     }
3525
3526     gen8_gpe_setup_interface_data(ctx, gpe_context);
3527
3528     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3529
3530     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3531                                         gpe_context,
3532                                         media_function,
3533                                         &media_object_walker_param);
3534     return va_status;
3535 }
3536
3537 static void
3538 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3539                           struct i965_gpe_context *gpe_context,
3540                           struct vp9_encoder_kernel_parameter *kernel_param)
3541 {
3542     struct i965_driver_data *i965 = i965_driver_data(ctx);
3543
3544     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3545
3546     gpe_context->sampler.entry_size = 0;
3547     gpe_context->sampler.max_entries = 0;
3548
3549     if (kernel_param->sampler_size) {
3550         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3551         gpe_context->sampler.max_entries = 1;
3552     }
3553
3554     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3555     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3556
3557     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3558     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3559     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3560     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3561
3562     if (i965->intel.eu_total > 0)
3563         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3564     else
3565         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3566
3567     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3568     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3569     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3570                                               gpe_context->vfe_state.curbe_allocation_size -
3571                                               ((gpe_context->idrt.entry_size >> 5) *
3572                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3573     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3574     gpe_context->vfe_state.gpgpu_mode = 0;
3575 }
3576
3577 static void
3578 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3579                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3580 {
3581     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3582     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3583     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3584
3585     if (scoreboard_param->walkpat_flag) {
3586         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3587         gpe_context->vfe_desc5.scoreboard0.type = 1;
3588
3589         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3590         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3591
3592         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3593         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3594
3595         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3596         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3597
3598         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3599         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3600     } else {
3601         // Scoreboard 0
3602         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3603         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3604
3605         // Scoreboard 1
3606         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3607         gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3608
3609         // Scoreboard 2
3610         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3611         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3612
3613         // Scoreboard 3
3614         gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3615         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3616
3617         // Scoreboard 4
3618         gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3619         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3620
3621         // Scoreboard 5
3622         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3623         gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3624
3625         // Scoreboard 6
3626         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3627         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3628
3629         // Scoreboard 7
3630         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3631         gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3632     }
3633 }
3634
3635 #define VP9_MI_BLOCK_MASK     0x07
3636 #define VP9_VME_REF_WIN       48
3637
3638 static VAStatus
3639 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3640                               struct encode_state *encode_state,
3641                               struct intel_encoder_context *encoder_context)
3642 {
3643     struct i965_driver_data *i965 = i965_driver_data(ctx);
3644     struct gen9_vp9_state *vp9_state;
3645     VAEncPictureParameterBufferVP9  *pic_param;
3646     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3647     VAEncSequenceParameterBufferVP9 *seq_param;
3648     struct object_surface *obj_surface;
3649     struct object_buffer *obj_buffer;
3650     struct gen9_surface_vp9 *vp9_priv_surface;
3651
3652     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3653
3654     if (!encode_state->pic_param_ext ||
3655         !encode_state->pic_param_ext->buffer) {
3656         return VA_STATUS_ERROR_INVALID_PARAMETER;
3657     }
3658     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3659
3660     if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3661         pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3662         pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3663         pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3664         return VA_STATUS_ERROR_INVALID_PARAMETER;
3665
3666     obj_buffer = BUFFER(pic_param->coded_buf);
3667
3668     if (!obj_buffer ||
3669         !obj_buffer->buffer_store ||
3670         !obj_buffer->buffer_store->bo)
3671         return VA_STATUS_ERROR_INVALID_PARAMETER;
3672
3673     encode_state->coded_buf_object = obj_buffer;
3674
3675     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3676
3677     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3678
3679     if (!encode_state->reconstructed_object ||
3680         !encode_state->input_yuv_object)
3681         return VA_STATUS_ERROR_INVALID_PARAMETER;
3682
3683     vp9_state->curr_frame = pic_param->reconstructed_frame;
3684     vp9_state->ref_frame_flag = 0;
3685     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3686         pic_param->pic_flags.bits.intra_only) {
3687         /* this will be regarded as I-frame type */
3688         vp9_state->picture_coding_type = 0;
3689         vp9_state->last_ref_obj = NULL;
3690         vp9_state->golden_ref_obj = NULL;
3691         vp9_state->alt_ref_obj = NULL;
3692     } else {
3693         vp9_state->picture_coding_type = 1;
3694         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3695                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3696
3697         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3698         vp9_state->last_ref_obj = obj_surface;
3699         if (!obj_surface ||
3700             !obj_surface->bo ||
3701             !obj_surface->private_data) {
3702             vp9_state->last_ref_obj = NULL;
3703             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3704         }
3705
3706         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3707         vp9_state->golden_ref_obj = obj_surface;
3708         if (!obj_surface ||
3709             !obj_surface->bo ||
3710             !obj_surface->private_data) {
3711             vp9_state->golden_ref_obj = NULL;
3712             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3713         }
3714
3715         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3716         vp9_state->alt_ref_obj = obj_surface;
3717         if (!obj_surface ||
3718             !obj_surface->bo ||
3719             !obj_surface->private_data) {
3720             vp9_state->alt_ref_obj = NULL;
3721             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3722         }
3723
3724         /* remove the duplicated flag and ref frame list */
3725         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3726             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3727                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3728                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3729                 vp9_state->golden_ref_obj = NULL;
3730             }
3731
3732             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3733                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3734                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3735                 vp9_state->alt_ref_obj = NULL;
3736             }
3737         }
3738
3739         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3740             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3741                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3742                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3743                 vp9_state->alt_ref_obj = NULL;
3744             }
3745         }
3746
3747         if (vp9_state->ref_frame_flag == 0)
3748             return VA_STATUS_ERROR_INVALID_PARAMETER;
3749     }
3750
3751     seg_param = NULL;
3752     if (pic_param->pic_flags.bits.segmentation_enabled) {
3753         if (!encode_state->q_matrix ||
3754             !encode_state->q_matrix->buffer) {
3755             return VA_STATUS_ERROR_INVALID_PARAMETER;
3756         }
3757         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3758                            encode_state->q_matrix->buffer;
3759     }
3760
3761     seq_param = NULL;
3762     if (encode_state->seq_param_ext &&
3763         encode_state->seq_param_ext->buffer)
3764         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3765
3766     if (!seq_param) {
3767         seq_param = &vp9_state->bogus_seq_param;
3768     }
3769
3770     vp9_state->pic_param = pic_param;
3771     vp9_state->segment_param = seg_param;
3772     vp9_state->seq_param = seq_param;
3773
3774     obj_surface = encode_state->reconstructed_object;
3775     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3776         pic_param->frame_height_dst > obj_surface->orig_height)
3777         return VA_STATUS_ERROR_INVALID_SURFACE;
3778
3779     if (!vp9_state->dys_enabled &&
3780          ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3781           (pic_param->frame_height_src != pic_param->frame_height_dst)))
3782         return VA_STATUS_ERROR_UNIMPLEMENTED;
3783
3784     if (vp9_state->brc_enabled) {
3785         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3786             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3787
3788             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3789                 !encoder_context->brc.bits_per_second[0])
3790                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3791
3792             vp9_state->gop_size = encoder_context->brc.gop_size;
3793             vp9_state->framerate = encoder_context->brc.framerate[0];
3794
3795             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3796                 !encoder_context->brc.target_percentage[0]) {
3797                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3798                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3799                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3800             } else {
3801                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3802                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3803                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3804                     vp9_state->min_bit_rate = 0;
3805                 else
3806                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3807             }
3808
3809             if (encoder_context->brc.hrd_buffer_size)
3810                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3811             else if (encoder_context->brc.window_size)
3812                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3813             else
3814                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3815             if (encoder_context->brc.hrd_initial_buffer_fullness)
3816                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3817             else
3818                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3819         }
3820     }
3821
3822     vp9_state->frame_width = pic_param->frame_width_dst;
3823     vp9_state->frame_height = pic_param->frame_height_dst;
3824
3825     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3826     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3827
3828     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3829     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3830
3831     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3832     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3833
3834     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3835     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3836     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3837     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3838
3839     vp9_state->dys_in_use = 0;
3840     if(pic_param->frame_width_src != pic_param->frame_width_dst ||
3841        pic_param->frame_height_src != pic_param->frame_height_dst)
3842         vp9_state->dys_in_use = 1;
3843     vp9_state->dys_ref_frame_flag = 0;
3844     /* check the dys setting. The dys is supported by default. */
3845     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3846         !pic_param->pic_flags.bits.intra_only) {
3847         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3848
3849         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3850              vp9_state->last_ref_obj) {
3851             obj_surface = vp9_state->last_ref_obj;
3852             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3853
3854             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3855                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3856                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3857         }
3858         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3859              vp9_state->golden_ref_obj) {
3860             obj_surface = vp9_state->golden_ref_obj;
3861             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3862
3863             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3864                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3865                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3866         }
3867         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3868              vp9_state->alt_ref_obj) {
3869             obj_surface = vp9_state->alt_ref_obj;
3870             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3871
3872             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3873                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3874                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3875         }
3876         if (vp9_state->dys_ref_frame_flag)
3877             vp9_state->dys_in_use = 1;
3878     }
3879
3880     if (vp9_state->hme_supported) {
3881         vp9_state->hme_enabled = 1;
3882     } else {
3883         vp9_state->hme_enabled = 0;
3884     }
3885
3886     if (vp9_state->b16xme_supported) {
3887         vp9_state->b16xme_enabled = 1;
3888     } else {
3889         vp9_state->b16xme_enabled = 0;
3890     }
3891
3892     /* disable HME/16xME if the size is too small */
3893     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3894         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3895         vp9_state->hme_enabled = 0;
3896         vp9_state->b16xme_enabled = 0;
3897     }
3898
3899     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3900         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3901         vp9_state->b16xme_enabled = 0;
3902
3903     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3904         pic_param->pic_flags.bits.intra_only) {
3905         vp9_state->hme_enabled = 0;
3906         vp9_state->b16xme_enabled = 0;
3907     }
3908
3909     vp9_state->mbenc_keyframe_dist_enabled = 0;
3910     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3911         vp9_state->brc_distortion_buffer_supported)
3912         vp9_state->mbenc_keyframe_dist_enabled = 1;
3913
3914     return VA_STATUS_SUCCESS;
3915 }
3916
3917 static VAStatus
3918 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3919                               struct encode_state *encode_state,
3920                               struct intel_encoder_context *encoder_context)
3921 {
3922     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3923     struct vp9_surface_param surface_param;
3924     struct gen9_vp9_state *vp9_state;
3925     VAEncPictureParameterBufferVP9  *pic_param;
3926     struct object_surface *obj_surface;
3927     struct gen9_surface_vp9 *vp9_surface;
3928     int driver_header_flag = 0;
3929     VAStatus va_status;
3930
3931     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3932
3933     if (!vp9_state || !vp9_state->pic_param)
3934         return VA_STATUS_ERROR_INVALID_PARAMETER;
3935
3936     pic_param = vp9_state->pic_param;
3937
3938     /* this is to check whether the driver should generate the uncompressed header */
3939     driver_header_flag = 1;
3940     if (encode_state->packed_header_data_ext &&
3941         encode_state->packed_header_data_ext[0] &&
3942         pic_param->bit_offset_first_partition_size) {
3943         VAEncPackedHeaderParameterBuffer *param = NULL;
3944
3945         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3946
3947         if (param->type == VAEncPackedHeaderRawData) {
3948             char *header_data;
3949             unsigned int length_in_bits;
3950
3951             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3952             length_in_bits = param->bit_length;
3953             driver_header_flag = 0;
3954
3955             vp9_state->frame_header.bit_offset_first_partition_size =
3956                           pic_param->bit_offset_first_partition_size;
3957             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3958             vp9_state->alias_insert_data = header_data;
3959
3960             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3961             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3962             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3963             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3964             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3965             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3966         }
3967     }
3968
3969     if (driver_header_flag) {
3970         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3971         intel_write_uncompressed_header(encode_state,
3972                                         VAProfileVP9Profile0,
3973                                         vme_context->frame_header_data,
3974                                         &vp9_state->header_length,
3975                                         &vp9_state->frame_header);
3976         vp9_state->alias_insert_data = vme_context->frame_header_data;
3977     }
3978
3979     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3980                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3981     if (va_status != VA_STATUS_SUCCESS)
3982         return va_status;
3983
3984     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3985                                     1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3986
3987     if (va_status != VA_STATUS_SUCCESS)
3988         return va_status;
3989
3990     surface_param.frame_width = vp9_state->frame_width;
3991     surface_param.frame_height = vp9_state->frame_height;
3992     va_status = gen9_vp9_init_check_surfaces(ctx,
3993                                              encode_state->reconstructed_object,
3994                                              &surface_param);
3995
3996     {
3997         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3998
3999         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
4000     }
4001     if (vp9_state->dys_in_use &&
4002         (pic_param->frame_width_src != pic_param->frame_width_dst ||
4003          pic_param->frame_height_src != pic_param->frame_height_dst)) {
4004         surface_param.frame_width = pic_param->frame_width_dst;
4005         surface_param.frame_height = pic_param->frame_height_dst;
4006         va_status = gen9_vp9_check_dys_surfaces(ctx,
4007                                     encode_state->reconstructed_object,
4008                                     &surface_param);
4009
4010         if (va_status)
4011             return va_status;
4012     }
4013
4014     if (vp9_state->dys_ref_frame_flag) {
4015         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
4016              vp9_state->last_ref_obj) {
4017             obj_surface = vp9_state->last_ref_obj;
4018             surface_param.frame_width = vp9_state->frame_width;
4019             surface_param.frame_height = vp9_state->frame_height;
4020             va_status = gen9_vp9_check_dys_surfaces(ctx,
4021                                     obj_surface,
4022                                     &surface_param);
4023
4024             if (va_status)
4025                 return va_status;
4026         }
4027         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
4028              vp9_state->golden_ref_obj) {
4029             obj_surface = vp9_state->golden_ref_obj;
4030             surface_param.frame_width = vp9_state->frame_width;
4031             surface_param.frame_height = vp9_state->frame_height;
4032             va_status = gen9_vp9_check_dys_surfaces(ctx,
4033                                     obj_surface,
4034                                     &surface_param);
4035
4036             if (va_status)
4037                 return va_status;
4038         }
4039         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
4040              vp9_state->alt_ref_obj) {
4041             obj_surface = vp9_state->alt_ref_obj;
4042             surface_param.frame_width = vp9_state->frame_width;
4043             surface_param.frame_height = vp9_state->frame_height;
4044             va_status = gen9_vp9_check_dys_surfaces(ctx,
4045                                     obj_surface,
4046                                     &surface_param);
4047
4048             if (va_status)
4049                 return va_status;
4050         }
4051     }
4052
4053     if (va_status != VA_STATUS_SUCCESS)
4054         return va_status;
4055     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4056
4057     return VA_STATUS_SUCCESS;
4058 }
4059
4060 static VAStatus
4061 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4062                               struct encode_state *encode_state,
4063                               struct intel_encoder_context *encoder_context)
4064 {
4065     struct i965_driver_data *i965 = i965_driver_data(ctx);
4066     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4067     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4068     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4069     struct gpe_dynamic_state_parameter ds_param;
4070     int i;
4071
4072     /*
4073      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4074      * MBEnc first
4075      */
4076     for (i = 0; i < NUM_VP9_MBENC; i++) {
4077         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4078     }
4079
4080     /*
4081      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4082      * curbe_buffer.
4083      */
4084     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4085            ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4086     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4087                                                "mbenc_dys",
4088                                                ds_param.bo_size,
4089                                                0x1000);
4090     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4091
4092     ds_param.bo = mbenc_context->mbenc_bo_dys;
4093     ds_param.curbe_offset = 0;
4094     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4095     for (i = 0; i < NUM_VP9_MBENC; i++) {
4096         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4097                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4098
4099         gen8_gpe_context_set_dynamic_buffer(ctx,
4100                                             &mbenc_context->gpe_contexts[i],
4101                                             &ds_param);
4102     }
4103
4104     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4105     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4106
4107     return VA_STATUS_SUCCESS;
4108 }
4109
4110 static VAStatus
4111 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4112                               struct encode_state *encode_state,
4113                               struct intel_encoder_context *encoder_context)
4114 {
4115     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4116     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4117
4118     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4119     mbenc_context->mbenc_bo_dys = NULL;
4120
4121     return VA_STATUS_SUCCESS;
4122 }
4123
4124 static VAStatus
4125 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4126                               struct encode_state *encode_state,
4127                               struct intel_encoder_context *encoder_context)
4128 {
4129     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4130     struct gen9_vp9_state *vp9_state;
4131     int i;
4132
4133     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4134
4135     if (!vp9_state || !vp9_state->pic_param)
4136         return VA_STATUS_ERROR_INVALID_PARAMETER;
4137
4138     if (vp9_state->dys_in_use) {
4139         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4140     }
4141
4142     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4143         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4144     }
4145
4146     if (vp9_state->picture_coding_type == KEY_FRAME) {
4147         for (i = 0; i < 2; i++)
4148             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4149     }
4150
4151     if (vp9_state->hme_supported) {
4152         gen9_vp9_scaling_kernel(ctx, encode_state,
4153                                 encoder_context,
4154                                 0);
4155         if (vp9_state->b16xme_supported) {
4156             gen9_vp9_scaling_kernel(ctx, encode_state,
4157                                     encoder_context,
4158                                     1);
4159         }
4160     }
4161
4162     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4163         if (vp9_state->b16xme_enabled)
4164             gen9_vp9_me_kernel(ctx, encode_state,
4165                                encoder_context,
4166                                1);
4167
4168         gen9_vp9_me_kernel(ctx, encode_state,
4169                            encoder_context,
4170                            0);
4171     }
4172
4173     if (vp9_state->brc_enabled) {
4174         if (vp9_state->mbenc_keyframe_dist_enabled)
4175             gen9_vp9_brc_intra_dist_kernel(ctx,
4176                                            encode_state,
4177                                            encoder_context);
4178
4179         gen9_vp9_brc_update_kernel(ctx, encode_state,
4180                                    encoder_context);
4181     }
4182
4183     if (vp9_state->picture_coding_type == KEY_FRAME) {
4184         gen9_vp9_mbenc_kernel(ctx, encode_state,
4185                               encoder_context,
4186                               VP9_MEDIA_STATE_MBENC_I_32x32);
4187         gen9_vp9_mbenc_kernel(ctx, encode_state,
4188                               encoder_context,
4189                               VP9_MEDIA_STATE_MBENC_I_16x16);
4190     } else {
4191         gen9_vp9_mbenc_kernel(ctx, encode_state,
4192                               encoder_context,
4193                               VP9_MEDIA_STATE_MBENC_P);
4194     }
4195
4196     gen9_vp9_mbenc_kernel(ctx, encode_state,
4197                           encoder_context,
4198                           VP9_MEDIA_STATE_MBENC_TX);
4199
4200     vp9_state->curr_mode_decision_index ^= 1;
4201     if (vp9_state->brc_enabled) {
4202         vp9_state->brc_inited = 1;
4203         vp9_state->brc_reset = 0;
4204     }
4205
4206     return VA_STATUS_SUCCESS;
4207 }
4208
4209 static VAStatus
4210 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4211                        VAProfile profile,
4212                        struct encode_state *encode_state,
4213                        struct intel_encoder_context *encoder_context)
4214 {
4215     VAStatus va_status;
4216     struct gen9_vp9_state *vp9_state;
4217
4218     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4219
4220     if (!vp9_state)
4221         return VA_STATUS_ERROR_INVALID_CONTEXT;
4222
4223     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4224     if (va_status != VA_STATUS_SUCCESS)
4225         return va_status;
4226
4227     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4228                             encoder_context,
4229                             !vp9_state->brc_allocated);
4230
4231     if (va_status != VA_STATUS_SUCCESS)
4232         return va_status;
4233     vp9_state->brc_allocated = 1;
4234
4235     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4236
4237     if (va_status != VA_STATUS_SUCCESS)
4238         return va_status;
4239
4240     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4241     if (va_status != VA_STATUS_SUCCESS)
4242         return va_status;
4243
4244     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4245     if (va_status != VA_STATUS_SUCCESS)
4246         return va_status;
4247
4248     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4249
4250     return VA_STATUS_SUCCESS;
4251 }
4252
4253 static void
4254 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4255 {
4256     int i;
4257
4258     for (i = 0; i < NUM_VP9_BRC; i++)
4259         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4260 }
4261
4262 static void
4263 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4264 {
4265     int i;
4266
4267     for (i = 0; i < NUM_VP9_SCALING; i++)
4268         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4269 }
4270
4271 static void
4272 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4273 {
4274     gen8_gpe_context_destroy(&me_context->gpe_context);
4275 }
4276
4277 static void
4278 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4279 {
4280     int i;
4281
4282     for (i = 0; i < NUM_VP9_MBENC; i++)
4283         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4284     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4285     mbenc_context->mbenc_bo_size = 0;
4286 }
4287
4288 static void
4289 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4290 {
4291     gen8_gpe_context_destroy(&dys_context->gpe_context);
4292 }
4293
4294 static void
4295 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4296 {
4297     gen9_vp9_free_resources(vme_context);
4298     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4299     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4300     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4301     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4302     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4303
4304     return;
4305 }
4306
4307 static void
4308 gen9_vme_context_destroy_vp9(void *context)
4309 {
4310     struct gen9_encoder_context_vp9 *vme_context = context;
4311
4312     if (!vme_context)
4313         return;
4314
4315     gen9_vme_kernel_context_destroy_vp9(vme_context);
4316
4317     free(vme_context);
4318
4319     return;
4320 }
4321
4322 static void
4323 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4324                                    struct gen9_encoder_context_vp9 *vme_context,
4325                                    struct vp9_scaling_context *scaling_context)
4326 {
4327     struct i965_gpe_context *gpe_context = NULL;
4328     struct vp9_encoder_kernel_parameter kernel_param;
4329     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4330     struct i965_kernel scale_kernel;
4331
4332     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4333     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4334     kernel_param.sampler_size = 0;
4335
4336     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4337     scoreboard_param.mask = 0xFF;
4338     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4339     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4340     scoreboard_param.walkpat_flag = 0;
4341
4342     gpe_context = &scaling_context->gpe_contexts[0];
4343     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4344     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4345
4346     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4347     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4348     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4349                            VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4350
4351     memset(&scale_kernel, 0, sizeof(scale_kernel));
4352
4353     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4354                                          sizeof(media_vp9_kernels),
4355                                          INTEL_VP9_ENC_SCALING4X,
4356                                          0,
4357                                          &scale_kernel);
4358
4359     gen8_gpe_load_kernels(ctx,
4360                           gpe_context,
4361                           &scale_kernel,
4362                           1);
4363
4364     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4365     kernel_param.inline_data_size = 0;
4366     kernel_param.sampler_size = 0;
4367
4368     gpe_context = &scaling_context->gpe_contexts[1];
4369     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4370     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4371
4372     memset(&scale_kernel, 0, sizeof(scale_kernel));
4373
4374     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4375                                          sizeof(media_vp9_kernels),
4376                                          INTEL_VP9_ENC_SCALING2X,
4377                                          0,
4378                                          &scale_kernel);
4379
4380     gen8_gpe_load_kernels(ctx,
4381                           gpe_context,
4382                           &scale_kernel,
4383                           1);
4384
4385     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4386     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4387     return;
4388 }
4389
4390 static void
4391 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4392                              struct gen9_encoder_context_vp9 *vme_context,
4393                              struct vp9_me_context *me_context)
4394 {
4395     struct i965_gpe_context *gpe_context = NULL;
4396     struct vp9_encoder_kernel_parameter kernel_param;
4397     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4398     struct i965_kernel scale_kernel;
4399
4400     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4401     kernel_param.inline_data_size = 0;
4402     kernel_param.sampler_size = 0;
4403
4404     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4405     scoreboard_param.mask = 0xFF;
4406     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4407     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4408     scoreboard_param.walkpat_flag = 0;
4409
4410     gpe_context = &me_context->gpe_context;
4411     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4412     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4413
4414     memset(&scale_kernel, 0, sizeof(scale_kernel));
4415
4416     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4417                                          sizeof(media_vp9_kernels),
4418                                          INTEL_VP9_ENC_ME,
4419                                          0,
4420                                          &scale_kernel);
4421
4422     gen8_gpe_load_kernels(ctx,
4423                           gpe_context,
4424                           &scale_kernel,
4425                           1);
4426
4427     return;
4428 }
4429
4430 static void
4431 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4432                                  struct gen9_encoder_context_vp9 *vme_context,
4433                                  struct vp9_mbenc_context *mbenc_context)
4434 {
4435     struct i965_gpe_context *gpe_context = NULL;
4436     struct vp9_encoder_kernel_parameter kernel_param;
4437     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4438     int i;
4439     struct i965_kernel scale_kernel;
4440
4441     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4442     kernel_param.inline_data_size = 0;
4443     kernel_param.sampler_size = 0;
4444
4445     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4446     scoreboard_param.mask = 0xFF;
4447     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4448     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4449
4450     for (i = 0; i < NUM_VP9_MBENC; i++) {
4451         gpe_context = &mbenc_context->gpe_contexts[i];
4452
4453         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4454             (i == VP9_MBENC_IDX_INTER)) {
4455             scoreboard_param.walkpat_flag = 1;
4456         } else
4457             scoreboard_param.walkpat_flag = 0;
4458
4459         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4460         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4461
4462         memset(&scale_kernel, 0, sizeof(scale_kernel));
4463
4464         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4465                                          sizeof(media_vp9_kernels),
4466                                          INTEL_VP9_ENC_MBENC,
4467                                          i,
4468                                          &scale_kernel);
4469
4470         gen8_gpe_load_kernels(ctx,
4471                               gpe_context,
4472                               &scale_kernel,
4473                               1);
4474     }
4475 }
4476
4477 static void
4478 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4479                               struct gen9_encoder_context_vp9 *vme_context,
4480                               struct vp9_brc_context *brc_context)
4481 {
4482     struct i965_gpe_context *gpe_context = NULL;
4483     struct vp9_encoder_kernel_parameter kernel_param;
4484     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4485     int i;
4486     struct i965_kernel scale_kernel;
4487
4488     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4489     kernel_param.inline_data_size = 0;
4490     kernel_param.sampler_size = 0;
4491
4492     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4493     scoreboard_param.mask = 0xFF;
4494     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4495     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4496
4497     for (i = 0; i < NUM_VP9_BRC; i++) {
4498         gpe_context = &brc_context->gpe_contexts[i];
4499         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4500         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4501
4502         memset(&scale_kernel, 0, sizeof(scale_kernel));
4503
4504         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4505                                          sizeof(media_vp9_kernels),
4506                                          INTEL_VP9_ENC_BRC,
4507                                          i,
4508                                          &scale_kernel);
4509
4510         gen8_gpe_load_kernels(ctx,
4511                               gpe_context,
4512                               &scale_kernel,
4513                               1);
4514     }
4515 }
4516
4517 static void
4518 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4519                                struct gen9_encoder_context_vp9 *vme_context,
4520                                struct vp9_dys_context *dys_context)
4521 {
4522     struct i965_gpe_context *gpe_context = NULL;
4523     struct vp9_encoder_kernel_parameter kernel_param;
4524     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4525     struct i965_kernel scale_kernel;
4526
4527     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4528     kernel_param.inline_data_size = 0;
4529     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4530
4531     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4532     scoreboard_param.mask = 0xFF;
4533     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4534     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4535     scoreboard_param.walkpat_flag = 0;
4536
4537     gpe_context = &dys_context->gpe_context;
4538     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4539     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4540
4541     memset(&scale_kernel, 0, sizeof(scale_kernel));
4542
4543     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4544                                          sizeof(media_vp9_kernels),
4545                                          INTEL_VP9_ENC_DYS,
4546                                          0,
4547                                          &scale_kernel);
4548
4549     gen8_gpe_load_kernels(ctx,
4550                           gpe_context,
4551                           &scale_kernel,
4552                           1);
4553
4554     return;
4555 }
4556
4557 static Bool
4558 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4559                                    struct intel_encoder_context *encoder_context,
4560                                    struct gen9_encoder_context_vp9 *vme_context)
4561 {
4562     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4563     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4564     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4565     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4566     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4567
4568     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4569     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4570     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4571     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4572
4573     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4574
4575     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4576     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4577     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4578     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4579     return true;
4580 }
4581
4582 static
4583 void gen9_vp9_write_compressed_element(char *buffer,
4584                                        int index,
4585                                        int prob,
4586                                        bool value)
4587 {
4588     struct vp9_compressed_element *base_element, *vp9_element;
4589     base_element = (struct vp9_compressed_element *)buffer;
4590
4591     vp9_element = base_element + (index >> 1);
4592     if (index % 2) {
4593         vp9_element->b_valid = 1;
4594         vp9_element->b_probdiff_select = 1;
4595         vp9_element->b_prob_select = (prob == 252) ? 1: 0;
4596         vp9_element->b_bin = value;
4597     } else {
4598         vp9_element->a_valid = 1;
4599         vp9_element->a_probdiff_select = 1;
4600         vp9_element->a_prob_select = (prob == 252) ? 1: 0;
4601         vp9_element->a_bin = value;
4602     }
4603 }
4604
4605 static void
4606 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4607                                             struct intel_encoder_context *encoder_context)
4608 {
4609     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4610     VAEncPictureParameterBufferVP9 *pic_param;
4611     struct gen9_vp9_state *vp9_state;
4612     char *buffer;
4613     int i;
4614
4615     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4616
4617     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4618         return;
4619
4620     pic_param = vp9_state->pic_param;
4621     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4622         (pic_param->pic_flags.bits.intra_only) ||
4623          pic_param->pic_flags.bits.error_resilient_mode) {
4624         /* reset current frame_context */
4625         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4626         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4627             pic_param->pic_flags.bits.error_resilient_mode ||
4628             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4629             for (i = 0; i < 4; i++)
4630                 memcpy(&vp9_state->vp9_frame_ctx[i],
4631                        &vp9_state->vp9_current_fc,
4632                        sizeof(FRAME_CONTEXT));
4633         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4634             i = pic_param->pic_flags.bits.frame_context_idx;
4635             memcpy(&vp9_state->vp9_frame_ctx[i],
4636                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4637         }
4638         /* reset the frame_ctx_idx = 0 */
4639         vp9_state->frame_ctx_idx = 0;
4640     } else {
4641         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4642     }
4643
4644     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4645     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4646
4647     if (!buffer)
4648         return;
4649
4650     /* write tx_size */
4651     if ((pic_param->luma_ac_qindex == 0) &&
4652         (pic_param->luma_dc_qindex_delta == 0) &&
4653         (pic_param->chroma_ac_qindex_delta == 0) &&
4654         (pic_param->chroma_dc_qindex_delta == 0)) {
4655         /* lossless flag */
4656         /* nothing is needed */
4657             gen9_vp9_write_compressed_element(buffer,
4658                                           0, 128, 0);
4659             gen9_vp9_write_compressed_element(buffer,
4660                                           1, 128, 0);
4661             gen9_vp9_write_compressed_element(buffer,
4662                                           2, 128, 0);
4663     } else {
4664         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4665             gen9_vp9_write_compressed_element(buffer,
4666                                           0, 128, 1);
4667             gen9_vp9_write_compressed_element(buffer,
4668                                           1, 128, 1);
4669             gen9_vp9_write_compressed_element(buffer,
4670                                           2, 128, 1);
4671         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4672             gen9_vp9_write_compressed_element(buffer,
4673                                           0, 128, 1);
4674             gen9_vp9_write_compressed_element(buffer,
4675                                           1, 128, 1);
4676             gen9_vp9_write_compressed_element(buffer,
4677                                           2, 128, 0);
4678         } else {
4679             unsigned int tx_mode;
4680
4681             tx_mode = vp9_state->tx_mode;
4682             gen9_vp9_write_compressed_element(buffer,
4683                                           0, 128, ((tx_mode) & 2));
4684             gen9_vp9_write_compressed_element(buffer,
4685                                           1, 128, ((tx_mode) & 1));
4686             gen9_vp9_write_compressed_element(buffer,
4687                                           2, 128, 0);
4688         }
4689
4690         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4691
4692             gen9_vp9_write_compressed_element(buffer,
4693                                           3, 128, 0);
4694
4695             gen9_vp9_write_compressed_element(buffer,
4696                                           7, 128, 0);
4697
4698             gen9_vp9_write_compressed_element(buffer,
4699                                           15, 128, 0);
4700         }
4701     }
4702      /*Setup all the input&output object*/
4703
4704     {
4705         /* update the coeff_update flag */
4706         gen9_vp9_write_compressed_element(buffer,
4707                                       27, 128, 0);
4708         gen9_vp9_write_compressed_element(buffer,
4709                                       820, 128, 0);
4710         gen9_vp9_write_compressed_element(buffer,
4711                                       1613, 128, 0);
4712         gen9_vp9_write_compressed_element(buffer,
4713                                       2406, 128, 0);
4714     }
4715
4716
4717     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only)
4718     {
4719         bool allow_comp = !(
4720             (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4721             (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4722             );
4723
4724         if (allow_comp)
4725         {
4726             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4727                 gen9_vp9_write_compressed_element(buffer,
4728                                           3271, 128, 1);
4729                 gen9_vp9_write_compressed_element(buffer,
4730                                           3272, 128, 1);
4731             }
4732             else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4733                 gen9_vp9_write_compressed_element(buffer,
4734                                           3271, 128, 1);
4735                 gen9_vp9_write_compressed_element(buffer,
4736                                           3272, 128, 0);
4737             }
4738             else {
4739
4740                 gen9_vp9_write_compressed_element(buffer,
4741                                           3271, 128, 0);
4742                 gen9_vp9_write_compressed_element(buffer,
4743                                           3272, 128, 0);
4744             }
4745         }
4746     }
4747
4748     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4749 }
4750
4751
4752 static void
4753 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4754                            struct encode_state *encode_state,
4755                            struct intel_encoder_context *encoder_context,
4756                            struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4757 {
4758     struct intel_batchbuffer *batch = encoder_context->base.batch;
4759
4760     BEGIN_BCS_BATCH(batch, 6);
4761
4762     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4763     OUT_BCS_BATCH(batch,
4764                   (pipe_mode_param->stream_out << 12) |
4765                   (pipe_mode_param->codec_mode << 5) |
4766                   (0 << 3) | /* disable Pic Status / Error Report */
4767                   (pipe_mode_param->stream_out << 2) |
4768                   HCP_CODEC_SELECT_ENCODE);
4769     OUT_BCS_BATCH(batch, 0);
4770     OUT_BCS_BATCH(batch, 0);
4771     OUT_BCS_BATCH(batch, (1 << 6));
4772     OUT_BCS_BATCH(batch, 0);
4773
4774     ADVANCE_BCS_BATCH(batch);
4775 }
4776
4777 static void
4778 gen9_vp9_add_surface_state(VADriverContextP ctx,
4779                        struct encode_state *encode_state,
4780                        struct intel_encoder_context *encoder_context,
4781                        hcp_surface_state *hcp_state)
4782 {
4783     struct intel_batchbuffer *batch = encoder_context->base.batch;
4784     if (!hcp_state)
4785         return;
4786
4787     BEGIN_BCS_BATCH(batch, 3);
4788     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4789     OUT_BCS_BATCH(batch,
4790                   (hcp_state->dw1.surface_id << 28) |
4791                   (hcp_state->dw1.surface_pitch - 1)
4792                  );
4793     OUT_BCS_BATCH(batch,
4794                   (hcp_state->dw2.surface_format << 28) |
4795                   (hcp_state->dw2.y_cb_offset)
4796                  );
4797     ADVANCE_BCS_BATCH(batch);
4798 }
4799
4800 static void
4801 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4802                                  struct encode_state *encode_state,
4803                                  struct intel_encoder_context *encoder_context)
4804 {
4805     struct i965_driver_data *i965 = i965_driver_data(ctx);
4806     struct intel_batchbuffer *batch = encoder_context->base.batch;
4807     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4808     struct gen9_vp9_state *vp9_state;
4809     unsigned int i;
4810     struct object_surface *obj_surface;
4811
4812     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4813
4814     if (!vp9_state || !vp9_state->pic_param)
4815          return;
4816
4817
4818     BEGIN_BCS_BATCH(batch, 104);
4819
4820     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4821
4822     obj_surface = encode_state->reconstructed_object;
4823
4824     /* reconstructed obj_surface is already checked. So this is skipped */
4825     /* DW 1..3 decoded surface */
4826     OUT_RELOC64(batch,
4827                 obj_surface->bo,
4828                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4829                 0);
4830     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4831
4832     /* DW 4..6 deblocking line */
4833     OUT_RELOC64(batch,
4834                 pak_context->res_deblocking_filter_line_buffer.bo,
4835                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4836                 0);
4837     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4838
4839     /* DW 7..9 deblocking tile line */
4840     OUT_RELOC64(batch,
4841                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4842                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4843                 0);
4844     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4845
4846     /* DW 10..12 deblocking tile col */
4847     OUT_RELOC64(batch,
4848                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4849                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4850                 0);
4851     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4852
4853     /* DW 13..15 metadata line */
4854     OUT_RELOC64(batch,
4855                 pak_context->res_metadata_line_buffer.bo,
4856                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4857                 0);
4858     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4859
4860     /* DW 16..18 metadata tile line */
4861     OUT_RELOC64(batch,
4862                 pak_context->res_metadata_tile_line_buffer.bo,
4863                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4864                 0);
4865     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4866
4867     /* DW 19..21 metadata tile col */
4868     OUT_RELOC64(batch,
4869                 pak_context->res_metadata_tile_col_buffer.bo,
4870                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4871                 0);
4872     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4873
4874     /* DW 22..30 SAO is not used for VP9 */
4875     OUT_BCS_BATCH(batch, 0);
4876     OUT_BCS_BATCH(batch, 0);
4877     OUT_BCS_BATCH(batch, 0);
4878     OUT_BCS_BATCH(batch, 0);
4879     OUT_BCS_BATCH(batch, 0);
4880     OUT_BCS_BATCH(batch, 0);
4881     OUT_BCS_BATCH(batch, 0);
4882     OUT_BCS_BATCH(batch, 0);
4883     OUT_BCS_BATCH(batch, 0);
4884
4885     /* DW 31..33 Current Motion vector temporal buffer */
4886     OUT_RELOC64(batch,
4887                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4888                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4889                 0);
4890     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4891
4892     /* DW 34..36 Not used */
4893     OUT_BCS_BATCH(batch, 0);
4894     OUT_BCS_BATCH(batch, 0);
4895     OUT_BCS_BATCH(batch, 0);
4896
4897     /* Only the first three reference_frame is used for VP9 */
4898     /* DW 37..52 for reference_frame */
4899     i = 0;
4900     if (vp9_state->picture_coding_type) {
4901         for (i = 0; i < 3; i++) {
4902
4903             if (pak_context->reference_surfaces[i].bo) {
4904                 OUT_RELOC64(batch,
4905                             pak_context->reference_surfaces[i].bo,
4906                             I915_GEM_DOMAIN_INSTRUCTION, 0,
4907                             0);
4908             } else {
4909                 OUT_BCS_BATCH(batch, 0);
4910                 OUT_BCS_BATCH(batch, 0);
4911             }
4912         }
4913     }
4914
4915     for (; i < 8; i++) {
4916         OUT_BCS_BATCH(batch, 0);
4917         OUT_BCS_BATCH(batch, 0);
4918     }
4919
4920     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4921
4922     /* DW 54..56 for source input */
4923     OUT_RELOC64(batch,
4924                 pak_context->uncompressed_picture_source.bo,
4925                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4926                 0);
4927     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4928
4929     /* DW 57..59 StreamOut is not used */
4930     OUT_BCS_BATCH(batch, 0);
4931     OUT_BCS_BATCH(batch, 0);
4932     OUT_BCS_BATCH(batch, 0);
4933
4934     /* DW 60..62. Not used for encoder */
4935     OUT_BCS_BATCH(batch, 0);
4936     OUT_BCS_BATCH(batch, 0);
4937     OUT_BCS_BATCH(batch, 0);
4938
4939     /* DW 63..65. ILDB Not used for encoder */
4940     OUT_BCS_BATCH(batch, 0);
4941     OUT_BCS_BATCH(batch, 0);
4942     OUT_BCS_BATCH(batch, 0);
4943
4944     /* DW 66..81 For the collocated motion vector temporal buffer */
4945     if (vp9_state->picture_coding_type) {
4946         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4947         OUT_RELOC64(batch,
4948                 pak_context->res_mv_temporal_buffer[prev_index].bo,
4949                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4950                 0);
4951     } else {
4952         OUT_BCS_BATCH(batch, 0);
4953         OUT_BCS_BATCH(batch, 0);
4954     }
4955
4956     for (i = 1; i < 8; i++) {
4957         OUT_BCS_BATCH(batch, 0);
4958         OUT_BCS_BATCH(batch, 0);
4959     }
4960     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4961
4962     /* DW 83..85 VP9 prob buffer */
4963     OUT_RELOC64(batch,
4964                 pak_context->res_prob_buffer.bo,
4965                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4966                 0);
4967
4968     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4969
4970     /* DW 86..88 Segment id buffer */
4971     if (pak_context->res_segmentid_buffer.bo) {
4972         OUT_RELOC64(batch,
4973                     pak_context->res_segmentid_buffer.bo,
4974                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4975                     0);
4976     } else {
4977         OUT_BCS_BATCH(batch, 0);
4978         OUT_BCS_BATCH(batch, 0);
4979     }
4980     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4981
4982     /* DW 89..91 HVD line rowstore buffer */
4983     OUT_RELOC64(batch,
4984                 pak_context->res_hvd_line_buffer.bo,
4985                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4986                 0);
4987     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4988
4989     /* DW 92..94 HVD tile line rowstore buffer */
4990     OUT_RELOC64(batch,
4991                 pak_context->res_hvd_tile_line_buffer.bo,
4992                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4993                 0);
4994     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4995
4996     /* DW 95..97 SAO streamout. Not used for VP9 */
4997     OUT_BCS_BATCH(batch, 0);
4998     OUT_BCS_BATCH(batch, 0);
4999     OUT_BCS_BATCH(batch, 0);
5000
5001     /* reserved for KBL. 98..100 */
5002     OUT_BCS_BATCH(batch, 0);
5003     OUT_BCS_BATCH(batch, 0);
5004     OUT_BCS_BATCH(batch, 0);
5005
5006     /* 101..103 */
5007     OUT_BCS_BATCH(batch, 0);
5008     OUT_BCS_BATCH(batch, 0);
5009     OUT_BCS_BATCH(batch, 0);
5010
5011     ADVANCE_BCS_BATCH(batch);
5012 }
5013
5014 static void
5015 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
5016                                      struct encode_state *encode_state,
5017                                      struct intel_encoder_context *encoder_context)
5018 {
5019     struct i965_driver_data *i965 = i965_driver_data(ctx);
5020     struct intel_batchbuffer *batch = encoder_context->base.batch;
5021     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5022     struct gen9_vp9_state *vp9_state;
5023
5024     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5025
5026     /* to do */
5027     BEGIN_BCS_BATCH(batch, 29);
5028
5029     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
5030
5031     /* indirect bitstream object base */
5032     OUT_BCS_BATCH(batch, 0);
5033     OUT_BCS_BATCH(batch, 0);
5034     OUT_BCS_BATCH(batch, 0);
5035     /* the upper bound of indirect bitstream object */
5036     OUT_BCS_BATCH(batch, 0);
5037     OUT_BCS_BATCH(batch, 0);
5038
5039     /* DW 6: Indirect CU object base address */
5040     OUT_RELOC64(batch,
5041                 pak_context->res_mb_code_surface.bo,
5042                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
5043                 vp9_state->mb_data_offset);
5044     /* default attribute */
5045     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5046
5047     /* DW 9..11, PAK-BSE */
5048     OUT_RELOC64(batch,
5049                   pak_context->indirect_pak_bse_object.bo,
5050                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5051                   pak_context->indirect_pak_bse_object.offset);
5052     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5053
5054     /* DW 12..13 upper bound */
5055     OUT_RELOC64(batch,
5056                   pak_context->indirect_pak_bse_object.bo,
5057                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5058                   pak_context->indirect_pak_bse_object.end_offset);
5059
5060     /* DW 14..16 compressed header buffer */
5061     OUT_RELOC64(batch,
5062                 pak_context->res_compressed_input_buffer.bo,
5063                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5064                 0);
5065     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5066
5067     /* DW 17..19 prob counter streamout */
5068     OUT_RELOC64(batch,
5069                 pak_context->res_prob_counter_buffer.bo,
5070                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5071                 0);
5072     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5073
5074     /* DW 20..22 prob delta streamin */
5075     OUT_RELOC64(batch,
5076                 pak_context->res_prob_delta_buffer.bo,
5077                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5078                 0);
5079     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5080
5081     /* DW 23..25 Tile record streamout */
5082     OUT_RELOC64(batch,
5083                 pak_context->res_tile_record_streamout_buffer.bo,
5084                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5085                 0);
5086     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5087
5088     /* DW 26..28 CU record streamout */
5089     OUT_RELOC64(batch,
5090                 pak_context->res_cu_stat_streamout_buffer.bo,
5091                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5092                 0);
5093     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5094
5095     ADVANCE_BCS_BATCH(batch);
5096 }
5097
5098 static void
5099 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5100                            struct encode_state *encode_state,
5101                            struct intel_encoder_context *encoder_context,
5102                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5103 {
5104     struct intel_batchbuffer *batch = encoder_context->base.batch;
5105     uint32_t batch_value, tmp;
5106     VAEncPictureParameterBufferVP9 *pic_param;
5107
5108     if (!encode_state->pic_param_ext ||
5109         !encode_state->pic_param_ext->buffer) {
5110         return;
5111     }
5112
5113     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5114
5115     batch_value = seg_param->seg_flags.bits.segment_reference;
5116     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5117         pic_param->pic_flags.bits.intra_only)
5118         batch_value = 0;
5119
5120     BEGIN_BCS_BATCH(batch, 8);
5121
5122     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5123     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5124     OUT_BCS_BATCH(batch,
5125                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5126                   (batch_value << 1) |
5127                   (seg_param->seg_flags.bits.segment_reference_skipped <<0)
5128                  );
5129
5130     /* DW 3..6 is not used for encoder */
5131     OUT_BCS_BATCH(batch, 0);
5132     OUT_BCS_BATCH(batch, 0);
5133     OUT_BCS_BATCH(batch, 0);
5134     OUT_BCS_BATCH(batch, 0);
5135
5136     /* DW 7 Mode */
5137     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5138     batch_value = tmp;
5139     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5140     batch_value |= (tmp << 16);
5141     OUT_BCS_BATCH(batch, batch_value);
5142
5143     ADVANCE_BCS_BATCH(batch);
5144
5145 }
5146
5147 static void
5148 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5149                                  struct intel_encoder_context *encoder_context,
5150                                  struct i965_gpe_resource *obj_batch_buffer)
5151 {
5152     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5153     struct gen9_vp9_state *vp9_state;
5154     int uncompressed_header_length;
5155     unsigned int *cmd_ptr;
5156     unsigned int dw_length, bits_in_last_dw;
5157
5158     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5159
5160     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5161         return;
5162
5163     uncompressed_header_length = vp9_state->header_length;
5164     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5165
5166     if (!cmd_ptr)
5167         return;
5168
5169     bits_in_last_dw = uncompressed_header_length % 4;
5170     bits_in_last_dw *= 8;
5171
5172     if (bits_in_last_dw == 0)
5173         bits_in_last_dw = 32;
5174
5175     /* get the DWORD length of the inserted_data */
5176     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5177     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5178
5179     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5180                   (0 << 16) | /* the start offset in first DW */
5181                   (0 << 15) |
5182                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5183                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5184                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5185                   (1 << 2) |  /* last header flag. */
5186                   (0 << 1));
5187     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5188
5189     cmd_ptr += dw_length;
5190
5191     *cmd_ptr++ = MI_NOOP;
5192     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5193     i965_unmap_gpe_resource(obj_batch_buffer);
5194 }
5195
5196 static void
5197 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5198                            struct encode_state *encode_state,
5199                            struct intel_encoder_context *encoder_context)
5200 {
5201     struct intel_batchbuffer *batch = encoder_context->base.batch;
5202     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5203     struct object_surface *obj_surface;
5204     VAEncPictureParameterBufferVP9 *pic_param;
5205     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5206     struct gen9_vp9_state *vp9_state;
5207     struct gen9_surface_vp9 *vp9_priv_surface;
5208     int i;
5209     struct gen9_hcpe_pipe_mode_select_param mode_param;
5210     hcp_surface_state hcp_surface;
5211     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5212     int segment_count;
5213
5214     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5215
5216     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5217         return;
5218
5219     pic_param = vp9_state->pic_param;
5220     seg_param = vp9_state->segment_param;
5221
5222     if (vp9_state->curr_pak_pass == 0)
5223     {
5224         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5225                            &pak_context->res_pak_uncompressed_input_buffer);
5226
5227         // Check if driver already programmed pic state as part of BRC update kernel programming.
5228         if (!vp9_state->brc_enabled)
5229         {
5230             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5231                  encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5232         }
5233     }
5234
5235     if (vp9_state->curr_pak_pass == 0)
5236     {
5237         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5238     }
5239
5240     {
5241         /* copy the frame_context[frame_idx] into curr_frame_context */
5242         memcpy(&vp9_state->vp9_current_fc,
5243                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5244                sizeof(FRAME_CONTEXT));
5245         {
5246             uint8_t *prob_ptr;
5247
5248             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5249
5250             if (!prob_ptr)
5251                 return;
5252
5253             /* copy the current fc to vp9_prob buffer */
5254             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5255             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5256                  pic_param->pic_flags.bits.intra_only) {
5257                  FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5258
5259                  memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5260                         sizeof(vp9_kf_partition_probs));
5261                  memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5262                         sizeof(vp9_kf_uv_mode_prob));
5263             }
5264             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5265         }
5266     }
5267
5268     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5269         /* read image status and insert the conditional end cmd */
5270         /* image ctrl/status is already accessed */
5271         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5272         struct vp9_encode_status_buffer_internal *status_buffer;
5273
5274         status_buffer = &vp9_state->status_buffer;
5275         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5276         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5277         mi_cond_end.bo = status_buffer->bo;
5278         mi_cond_end.compare_data = 0;
5279         mi_cond_end.compare_mask_mode_disabled = 1;
5280         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5281                                                  &mi_cond_end);
5282     }
5283
5284     mode_param.codec_mode = 1;
5285     mode_param.stream_out = 0;
5286     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5287
5288     /* reconstructed surface */
5289     memset(&hcp_surface, 0, sizeof(hcp_surface));
5290     obj_surface = encode_state->reconstructed_object;
5291     hcp_surface.dw1.surface_id = 0;
5292     hcp_surface.dw1.surface_pitch = obj_surface->width;
5293     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5294     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5295     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5296                                &hcp_surface);
5297
5298     /* Input surface */
5299     if (vp9_state->dys_in_use &&
5300         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5301          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5302         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5303         obj_surface = vp9_priv_surface->dys_surface_obj;
5304     } else {
5305         obj_surface = encode_state->input_yuv_object;
5306     }
5307
5308     hcp_surface.dw1.surface_id = 1;
5309     hcp_surface.dw1.surface_pitch = obj_surface->width;
5310     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5311     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5312     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5313                                &hcp_surface);
5314
5315     if (vp9_state->picture_coding_type) {
5316         /* Add surface for last */
5317         if (vp9_state->last_ref_obj) {
5318             obj_surface = vp9_state->last_ref_obj;
5319             hcp_surface.dw1.surface_id = 2;
5320             hcp_surface.dw1.surface_pitch = obj_surface->width;
5321             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5322             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5323             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5324                                &hcp_surface);
5325         }
5326         if (vp9_state->golden_ref_obj) {
5327             obj_surface = vp9_state->golden_ref_obj;
5328             hcp_surface.dw1.surface_id = 3;
5329             hcp_surface.dw1.surface_pitch = obj_surface->width;
5330             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5331             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5332             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5333                                &hcp_surface);
5334         }
5335         if (vp9_state->alt_ref_obj) {
5336             obj_surface = vp9_state->alt_ref_obj;
5337             hcp_surface.dw1.surface_id = 4;
5338             hcp_surface.dw1.surface_pitch = obj_surface->width;
5339             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5340             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5341             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5342                                &hcp_surface);
5343         }
5344     }
5345
5346     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5347
5348     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5349
5350     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5351     memset(&second_level_batch, 0, sizeof(second_level_batch));
5352
5353     if (vp9_state->curr_pak_pass == 0) {
5354         second_level_batch.offset = 0;
5355     } else
5356         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5357
5358     second_level_batch.is_second_level = 1;
5359     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5360
5361     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5362
5363     if (pic_param->pic_flags.bits.segmentation_enabled &&
5364         seg_param)
5365         segment_count = 8;
5366     else {
5367         segment_count = 1;
5368         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5369         seg_param = &tmp_seg_param;
5370     }
5371     for (i = 0; i < segment_count; i++)
5372     {
5373         gen9_pak_vp9_segment_state(ctx, encode_state,
5374                                    encoder_context,
5375                                    &seg_param->seg_data[i], i);
5376     }
5377
5378     /* Insert the uncompressed header buffer */
5379     second_level_batch.is_second_level = 1;
5380     second_level_batch.offset = 0;
5381     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5382
5383     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5384
5385     /* PAK_OBJECT */
5386     second_level_batch.is_second_level = 1;
5387     second_level_batch.offset = 0;
5388     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5389     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5390
5391     return;
5392 }
5393
5394 static void
5395 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5396 {
5397     struct intel_batchbuffer *batch = encoder_context->base.batch;
5398     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5399     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5400     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5401     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5402     struct vp9_encode_status_buffer_internal *status_buffer;
5403     struct gen9_vp9_state *vp9_state;
5404
5405     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5406     if (!vp9_state || !pak_context || !batch)
5407         return;
5408
5409     status_buffer = &(vp9_state->status_buffer);
5410
5411     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5412     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5413
5414     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5415     mi_store_reg_mem_param.bo = status_buffer->bo;
5416     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5417     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5418     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5419
5420     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5421     mi_store_reg_mem_param.offset = 0;
5422     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5423     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5424
5425     /* Read HCP Image status */
5426     mi_store_reg_mem_param.bo = status_buffer->bo;
5427     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5428     mi_store_reg_mem_param.mmio_offset =
5429                                status_buffer->vp9_image_mask_reg_offset;
5430     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5431
5432     mi_store_reg_mem_param.bo = status_buffer->bo;
5433     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5434     mi_store_reg_mem_param.mmio_offset =
5435                                status_buffer->vp9_image_ctrl_reg_offset;
5436     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5437
5438     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5439     mi_store_reg_mem_param.offset = 4;
5440     mi_store_reg_mem_param.mmio_offset =
5441                                status_buffer->vp9_image_ctrl_reg_offset;
5442     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5443
5444     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5445
5446     return;
5447 }
5448
5449 static VAStatus
5450 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5451                               struct encode_state *encode_state,
5452                               struct intel_encoder_context *encoder_context)
5453 {
5454     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5455     struct object_surface *obj_surface;
5456     struct object_buffer *obj_buffer;
5457     struct i965_coded_buffer_segment *coded_buffer_segment;
5458     VAEncPictureParameterBufferVP9 *pic_param;
5459     struct gen9_vp9_state *vp9_state;
5460     dri_bo *bo;
5461     int i;
5462
5463     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5464     if (!vp9_state ||
5465         !vp9_state->pic_param)
5466         return VA_STATUS_ERROR_INVALID_PARAMETER;
5467
5468     pic_param = vp9_state->pic_param;
5469
5470     /* reconstructed surface */
5471     obj_surface = encode_state->reconstructed_object;
5472     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5473
5474     dri_bo_unreference(pak_context->reconstructed_object.bo);
5475
5476     pak_context->reconstructed_object.bo = obj_surface->bo;
5477     dri_bo_reference(pak_context->reconstructed_object.bo);
5478
5479     /* set vp9 reference frames */
5480     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5481         if (pak_context->reference_surfaces[i].bo)
5482             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5483         pak_context->reference_surfaces[i].bo = NULL;
5484     }
5485
5486     /* Three reference frames are enough for VP9 */
5487     if (pic_param->pic_flags.bits.frame_type &&
5488         !pic_param->pic_flags.bits.intra_only) {
5489         for (i = 0; i < 3; i++) {
5490             obj_surface = encode_state->reference_objects[i];
5491             if (obj_surface && obj_surface->bo) {
5492                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5493                 dri_bo_reference(obj_surface->bo);
5494             }
5495         }
5496     }
5497
5498     /* input YUV surface */
5499     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5500     pak_context->uncompressed_picture_source.bo = NULL;
5501     obj_surface = encode_state->reconstructed_object;
5502     if (vp9_state->dys_in_use &&
5503         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5504          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5505         struct gen9_surface_vp9 *vp9_priv_surface =
5506             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5507         obj_surface = vp9_priv_surface->dys_surface_obj;
5508     } else
5509         obj_surface = encode_state->input_yuv_object;
5510
5511     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5512     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5513
5514     /* coded buffer */
5515     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5516     pak_context->indirect_pak_bse_object.bo = NULL;
5517     obj_buffer = encode_state->coded_buf_object;
5518     bo = obj_buffer->buffer_store->bo;
5519     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5520     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5521     pak_context->indirect_pak_bse_object.bo = bo;
5522     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5523
5524     /* set the internal flag to 0 to indicate the coded size is unknown */
5525     dri_bo_map(bo, 1);
5526     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5527     coded_buffer_segment->mapped = 0;
5528     coded_buffer_segment->codec = encoder_context->codec;
5529     coded_buffer_segment->status_support = 1;
5530     dri_bo_unmap(bo);
5531
5532     return VA_STATUS_SUCCESS;
5533 }
5534
5535 static void
5536 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5537                           struct intel_encoder_context *encoder_context)
5538 {
5539 }
5540
5541 static void
5542 gen9_vp9_pak_context_destroy(void *context)
5543 {
5544     struct gen9_encoder_context_vp9 *pak_context = context;
5545     int i;
5546
5547     dri_bo_unreference(pak_context->reconstructed_object.bo);
5548     pak_context->reconstructed_object.bo = NULL;
5549
5550     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5551     pak_context->uncompressed_picture_source.bo = NULL;
5552
5553     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5554     pak_context->indirect_pak_bse_object.bo = NULL;
5555
5556     for (i = 0; i < 8; i++){
5557         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5558         pak_context->reference_surfaces[i].bo = NULL;
5559     }
5560
5561     /* vme & pak same the same structure, so don't free the context here */
5562 }
5563
5564 static VAStatus
5565 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5566                        VAProfile profile,
5567                        struct encode_state *encode_state,
5568                        struct intel_encoder_context *encoder_context)
5569 {
5570     struct i965_driver_data *i965 = i965_driver_data(ctx);
5571     struct intel_batchbuffer *batch = encoder_context->base.batch;
5572     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5573     VAStatus va_status;
5574     struct gen9_vp9_state *vp9_state;
5575     VAEncPictureParameterBufferVP9 *pic_param;
5576     int i;
5577
5578     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5579
5580     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5581         return VA_STATUS_ERROR_INVALID_PARAMETER;
5582
5583     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5584
5585     if (va_status != VA_STATUS_SUCCESS)
5586         return va_status;
5587
5588     if (i965->intel.has_bsd2)
5589         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5590     else
5591         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5592
5593     intel_batchbuffer_emit_mi_flush(batch);
5594
5595     BEGIN_BCS_BATCH(batch, 64);
5596     for (i = 0; i < 64; i++)
5597         OUT_BCS_BATCH(batch, MI_NOOP);
5598
5599     ADVANCE_BCS_BATCH(batch);
5600
5601     for (vp9_state->curr_pak_pass = 0;
5602          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5603          vp9_state->curr_pak_pass++) {
5604
5605         if (vp9_state->curr_pak_pass == 0) {
5606             /* Initialize the VP9 Image Ctrl reg for the first pass */
5607             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5608             struct vp9_encode_status_buffer_internal *status_buffer;
5609
5610             status_buffer = &(vp9_state->status_buffer);
5611             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5612             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5613             mi_load_reg_imm.data = 0;
5614             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5615         }
5616         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5617         gen9_vp9_read_mfc_status(ctx, encoder_context);
5618     }
5619
5620     intel_batchbuffer_end_atomic(batch);
5621     intel_batchbuffer_flush(batch);
5622
5623     pic_param = vp9_state->pic_param;
5624     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5625     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5626     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5627     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5628     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5629     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5630     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5631     vp9_state->frame_number++;
5632     vp9_state->curr_mv_temporal_index ^= 1;
5633     vp9_state->first_frame = 0;
5634
5635     return VA_STATUS_SUCCESS;
5636 }
5637
5638 Bool
5639 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5640 {
5641     struct gen9_encoder_context_vp9 *vme_context = NULL;
5642     struct gen9_vp9_state *vp9_state = NULL;
5643
5644     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5645     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5646
5647     if (!vme_context || !vp9_state) {
5648         if (vme_context)
5649             free(vme_context);
5650         if (vp9_state)
5651             free(vp9_state);
5652         return false;
5653     }
5654
5655     encoder_context->enc_priv_state = vp9_state;
5656     vme_context->enc_priv_state = vp9_state;
5657
5658     /* Initialize the features that are supported by VP9 */
5659     vme_context->hme_supported = 1;
5660     vme_context->use_hw_scoreboard = 1;
5661     vme_context->use_hw_non_stalling_scoreboard = 1;
5662
5663     vp9_state->tx_mode = TX_MODE_SELECT;
5664     vp9_state->multi_ref_qp_check = 0;
5665     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5666     vp9_state->num_pak_passes = 1;
5667     vp9_state->hme_supported = vme_context->hme_supported;
5668     vp9_state->b16xme_supported = 1;
5669
5670     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5671         encoder_context->rate_control_mode != VA_RC_CQP) {
5672         vp9_state->brc_enabled = 1;
5673         vp9_state->brc_distortion_buffer_supported = 1;
5674         vp9_state->brc_constant_buffer_supported = 1;
5675         vp9_state->num_pak_passes = 4;
5676     }
5677     vp9_state->dys_enabled = 1; /* this is supported by default */
5678     vp9_state->first_frame = 1;
5679
5680     /* the definition of status buffer offset for VP9 */
5681     {
5682         struct vp9_encode_status_buffer_internal *status_buffer;
5683         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5684
5685         status_buffer = &vp9_state->status_buffer;
5686         memset(status_buffer, 0,
5687                sizeof(struct vp9_encode_status_buffer_internal));
5688
5689         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5690         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5691         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5692         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5693
5694         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5695         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5696         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5697     }
5698
5699     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5700
5701     encoder_context->vme_context = vme_context;
5702     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5703     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5704
5705     return true;
5706 }
5707
5708 static VAStatus
5709 gen9_vp9_get_coded_status(VADriverContextP ctx,
5710                           struct intel_encoder_context *encoder_context,
5711                           struct i965_coded_buffer_segment *coded_buf_seg)
5712 {
5713     struct vp9_encode_status *vp9_encode_status;
5714
5715     if (!encoder_context || !coded_buf_seg)
5716         return VA_STATUS_ERROR_INVALID_BUFFER;
5717
5718     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5719     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5720
5721     /* One VACodedBufferSegment for VP9 will be added later.
5722      * It will be linked to the next element of coded_buf_seg->base.next
5723      */
5724
5725     return VA_STATUS_SUCCESS;
5726 }
5727
5728 Bool
5729 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5730 {
5731     /* VME & PAK share the same context */
5732     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5733
5734     if (!pak_context)
5735         return false;
5736
5737     encoder_context->mfc_context = pak_context;
5738     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5739     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5740     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5741     encoder_context->get_status = gen9_vp9_get_coded_status;
5742     return true;
5743 }