OSDN Git Service

Unify the coding style in the driver
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfd.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va.h>
32 #include <va/va_dec_hevc.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "i965_decoder_utils.h"
39
40 #include "gen9_mfd.h"
41 #include "intel_media.h"
42 #include "vp9_probs.h"
43
44 #define OUT_BUFFER(buf_bo, is_target, ma)  do {                         \
45         if (buf_bo) {                                                   \
46             OUT_BCS_RELOC64(batch,                                        \
47                           buf_bo,                                       \
48                           I915_GEM_DOMAIN_RENDER,                       \
49                           is_target ? I915_GEM_DOMAIN_RENDER : 0,       \
50                           0);                                           \
51         } else {                                                        \
52             OUT_BCS_BATCH(batch, 0);                                    \
53             OUT_BCS_BATCH(batch, 0);                                    \
54         }                                                               \
55         if (ma)                                                         \
56             OUT_BCS_BATCH(batch, i965->intel.mocs_state);                                    \
57     } while (0)
58
59 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER(buf_bo, 1, 1)
60 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER(buf_bo, 0, 1)
61 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER(buf_bo, 1, 0)
62 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER(buf_bo, 0, 0)
63
64 static void
65 gen9_hcpd_init_hevc_surface(VADriverContextP ctx,
66                             VAPictureParameterBufferHEVC *pic_param,
67                             struct object_surface *obj_surface,
68                             struct gen9_hcpd_context *gen9_hcpd_context)
69 {
70     struct i965_driver_data *i965 = i965_driver_data(ctx);
71     GenHevcSurface *gen9_hevc_surface;
72
73     if (!obj_surface)
74         return;
75
76     obj_surface->free_private_data = gen_free_hevc_surface;
77     gen9_hevc_surface = obj_surface->private_data;
78
79     if (!gen9_hevc_surface) {
80         gen9_hevc_surface = calloc(sizeof(GenHevcSurface), 1);
81         assert(gen9_hevc_surface);
82         gen9_hevc_surface->base.frame_store_id = -1;
83         obj_surface->private_data = gen9_hevc_surface;
84     }
85
86     if (gen9_hevc_surface->motion_vector_temporal_bo == NULL) {
87         uint32_t size;
88
89         if (gen9_hcpd_context->ctb_size == 16)
90             size = ((gen9_hcpd_context->picture_width_in_pixels + 63) >> 6) *
91                    ((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4);
92         else
93             size = ((gen9_hcpd_context->picture_width_in_pixels + 31) >> 5) *
94                    ((gen9_hcpd_context->picture_height_in_pixels + 31) >> 5);
95
96         size <<= 6; /* in unit of 64bytes */
97         gen9_hevc_surface->motion_vector_temporal_bo = dri_bo_alloc(i965->intel.bufmgr,
98                                                                     "motion vector temporal buffer",
99                                                                     size,
100                                                                     0x1000);
101     }
102 }
103
104 static VAStatus
105 gen9_hcpd_hevc_decode_init(VADriverContextP ctx,
106                            struct decode_state *decode_state,
107                            struct gen9_hcpd_context *gen9_hcpd_context)
108 {
109     struct i965_driver_data *i965 = i965_driver_data(ctx);
110     VAPictureParameterBufferHEVC *pic_param;
111     struct object_surface *obj_surface;
112     uint32_t size;
113     int size_shift = 3;
114
115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
116     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
117     intel_update_hevc_frame_store_index(ctx,
118                                         decode_state,
119                                         pic_param,
120                                         gen9_hcpd_context->reference_surfaces,
121                                         &gen9_hcpd_context->fs_ctx);
122
123     gen9_hcpd_context->picture_width_in_pixels = pic_param->pic_width_in_luma_samples;
124     gen9_hcpd_context->picture_height_in_pixels = pic_param->pic_height_in_luma_samples;
125     gen9_hcpd_context->ctb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 +
126                                          3 +
127                                          pic_param->log2_diff_max_min_luma_coding_block_size));
128     gen9_hcpd_context->picture_width_in_ctbs = ALIGN(gen9_hcpd_context->picture_width_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
129     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(gen9_hcpd_context->picture_height_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
130     gen9_hcpd_context->min_cb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 + 3));
131     gen9_hcpd_context->picture_width_in_min_cb_minus1 = gen9_hcpd_context->picture_width_in_pixels / gen9_hcpd_context->min_cb_size - 1;
132     gen9_hcpd_context->picture_height_in_min_cb_minus1 = gen9_hcpd_context->picture_height_in_pixels / gen9_hcpd_context->min_cb_size - 1;
133
134     /* Current decoded picture */
135     obj_surface = decode_state->render_object;
136     hevc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
137     gen9_hcpd_init_hevc_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
138
139     if ((pic_param->bit_depth_luma_minus8 > 0)
140         || (pic_param->bit_depth_chroma_minus8 > 0))
141         size_shift = 2;
142
143     size = ALIGN(gen9_hcpd_context->picture_width_in_pixels, 32) >> size_shift;
144     size <<= 6;
145     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
146     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
147
148     size = ALIGN(gen9_hcpd_context->picture_height_in_pixels + 6 * gen9_hcpd_context->picture_height_in_ctbs, 32) >> size_shift;
149     size <<= 6;
150     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
151
152     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 188 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
153     size <<= 6;
154     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
155
156     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 172 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
157     size <<= 6;
158     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
159
160     if (IS_CHERRYVIEW(i965->intel.device_info))
161         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 256 + 9 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
162     else
163         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 176 + 89 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
164     size <<= 6;
165     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
166
167     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 3 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
168     size <<= 6;
169     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer), "sao line buffer", size);
170
171     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
172     size <<= 6;
173     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer), "sao tile line buffer", size);
174
175     size = ALIGN(((gen9_hcpd_context->picture_height_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_height_in_ctbs), 16) >> size_shift;
176     size <<= 6;
177     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer), "sao tile column buffer", size);
178
179     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
180     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
181     gen9_hcpd_context->first_inter_slice_valid = 0;
182
183     return VA_STATUS_SUCCESS;
184 }
185
186 static void
187 gen9_hcpd_pipe_mode_select(VADriverContextP ctx,
188                            struct decode_state *decode_state,
189                            int codec,
190                            struct gen9_hcpd_context *gen9_hcpd_context)
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
194
195     assert((codec == HCP_CODEC_HEVC) || (codec == HCP_CODEC_VP9));
196
197     if (IS_KBL(i965->intel.device_info) ||
198         IS_GLK(i965->intel.device_info)) {
199         BEGIN_BCS_BATCH(batch, 6);
200
201         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
202     } else {
203         BEGIN_BCS_BATCH(batch, 4);
204
205         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
206     }
207     OUT_BCS_BATCH(batch,
208                   (codec << 5) |
209                   (0 << 3) | /* disable Pic Status / Error Report */
210                   HCP_CODEC_SELECT_DECODE);
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213
214     if (IS_KBL(i965->intel.device_info) ||
215         IS_GLK(i965->intel.device_info)) {
216         if (codec == HCP_CODEC_VP9)
217             OUT_BCS_BATCH(batch, 1 << 6);
218         else
219             OUT_BCS_BATCH(batch, 0);
220
221         OUT_BCS_BATCH(batch, 0);
222     }
223
224     ADVANCE_BCS_BATCH(batch);
225 }
226
227 static void
228 gen9_hcpd_surface_state(VADriverContextP ctx,
229                         struct decode_state *decode_state,
230                         struct gen9_hcpd_context *gen9_hcpd_context)
231 {
232     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
233     struct object_surface *obj_surface = decode_state->render_object;
234     unsigned int y_cb_offset;
235     VAPictureParameterBufferHEVC *pic_param;
236
237     assert(obj_surface);
238
239     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
240     y_cb_offset = obj_surface->y_cb_offset;
241
242     BEGIN_BCS_BATCH(batch, 3);
243
244     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
245     OUT_BCS_BATCH(batch,
246                   (0 << 28) |                   /* surface id */
247                   (obj_surface->width - 1));    /* pitch - 1 */
248     if ((pic_param->bit_depth_luma_minus8 > 0)
249         || (pic_param->bit_depth_chroma_minus8 > 0)) {
250         OUT_BCS_BATCH(batch,
251                       (SURFACE_FORMAT_P010 << 28) |
252                       y_cb_offset);
253     } else {
254         OUT_BCS_BATCH(batch,
255                       (SURFACE_FORMAT_PLANAR_420_8 << 28) |
256                       y_cb_offset);
257     }
258
259     ADVANCE_BCS_BATCH(batch);
260 }
261
262 static void
263 gen9_hcpd_pipe_buf_addr_state(VADriverContextP ctx,
264                               struct decode_state *decode_state,
265                               struct gen9_hcpd_context *gen9_hcpd_context)
266 {
267     struct i965_driver_data *i965 = i965_driver_data(ctx);
268     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
269     struct object_surface *obj_surface;
270     GenHevcSurface *gen9_hevc_surface;
271     int i;
272
273     BEGIN_BCS_BATCH(batch, 95);
274
275     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
276
277     obj_surface = decode_state->render_object;
278     assert(obj_surface && obj_surface->bo);
279     gen9_hevc_surface = obj_surface->private_data;
280     assert(gen9_hevc_surface && gen9_hevc_surface->motion_vector_temporal_bo);
281
282     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
283     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
284     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
285     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
286     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
287     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
288     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
289     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_line_buffer.bo);              /* DW 22..24 */
290     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
291     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
292     OUT_BUFFER_MA_TARGET(gen9_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
293     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
294
295     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
296         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
297
298         if (obj_surface)
299             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
300         else
301             OUT_BUFFER_NMA_REFERENCE(NULL);
302     }
303     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
304
305     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
306     OUT_BUFFER_MA_TARGET(NULL);
307     OUT_BUFFER_MA_TARGET(NULL);
308     OUT_BUFFER_MA_TARGET(NULL);
309
310     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
311         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
312         gen9_hevc_surface = NULL;
313
314         if (obj_surface && obj_surface->private_data)
315             gen9_hevc_surface = obj_surface->private_data;
316
317         if (gen9_hevc_surface)
318             OUT_BUFFER_NMA_REFERENCE(gen9_hevc_surface->motion_vector_temporal_bo);
319         else
320             OUT_BUFFER_NMA_REFERENCE(NULL);
321     }
322     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
323
324     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
325     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
326     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
327     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
328
329     ADVANCE_BCS_BATCH(batch);
330 }
331
332 static void
333 gen9_hcpd_ind_obj_base_addr_state(VADriverContextP ctx,
334                                   dri_bo *slice_data_bo,
335                                   struct gen9_hcpd_context *gen9_hcpd_context)
336 {
337     struct i965_driver_data *i965 = i965_driver_data(ctx);
338     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
339
340     BEGIN_BCS_BATCH(batch, 14);
341
342     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
343     OUT_BUFFER_MA_REFERENCE(slice_data_bo);        /* DW 1..3 */
344     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
345     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 6..8, CU, ignored */
346     OUT_BUFFER_MA_TARGET(NULL);                    /* DW 9..11, PAK-BSE, ignored */
347     OUT_BUFFER_NMA_TARGET(NULL);                   /* DW 12..13, Upper Bound  */
348
349     ADVANCE_BCS_BATCH(batch);
350 }
351
352 static void
353 gen9_hcpd_qm_state(VADriverContextP ctx,
354                    int size_id,
355                    int color_component,
356                    int pred_type,
357                    int dc,
358                    unsigned char *qm,
359                    int qm_length,
360                    struct gen9_hcpd_context *gen9_hcpd_context)
361 {
362     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
363     unsigned char qm_buffer[64];
364
365     assert(qm_length <= 64);
366     memset(qm_buffer, 0, sizeof(qm_buffer));
367     memcpy(qm_buffer, qm, qm_length);
368
369     BEGIN_BCS_BATCH(batch, 18);
370
371     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
372     OUT_BCS_BATCH(batch,
373                   dc << 5 |
374                   color_component << 3 |
375                   size_id << 1 |
376                   pred_type);
377     intel_batchbuffer_data(batch, qm_buffer, 64);
378
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen9_hcpd_hevc_qm_state(VADriverContextP ctx,
384                         struct decode_state *decode_state,
385                         struct gen9_hcpd_context *gen9_hcpd_context)
386 {
387     VAIQMatrixBufferHEVC *iq_matrix;
388     VAPictureParameterBufferHEVC *pic_param;
389     int i;
390
391     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
392         iq_matrix = (VAIQMatrixBufferHEVC *)decode_state->iq_matrix->buffer;
393     else
394         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
395
396     assert(decode_state->pic_param && decode_state->pic_param->buffer);
397     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
398
399     if (!pic_param->pic_fields.bits.scaling_list_enabled_flag)
400         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
401
402     for (i = 0; i < 6; i++) {
403         gen9_hcpd_qm_state(ctx,
404                            0, i % 3, i / 3, 0,
405                            iq_matrix->ScalingList4x4[i], 16,
406                            gen9_hcpd_context);
407     }
408
409     for (i = 0; i < 6; i++) {
410         gen9_hcpd_qm_state(ctx,
411                            1, i % 3, i / 3, 0,
412                            iq_matrix->ScalingList8x8[i], 64,
413                            gen9_hcpd_context);
414     }
415
416     for (i = 0; i < 6; i++) {
417         gen9_hcpd_qm_state(ctx,
418                            2, i % 3, i / 3, iq_matrix->ScalingListDC16x16[i],
419                            iq_matrix->ScalingList16x16[i], 64,
420                            gen9_hcpd_context);
421     }
422
423     for (i = 0; i < 2; i++) {
424         gen9_hcpd_qm_state(ctx,
425                            3, 0, i % 2, iq_matrix->ScalingListDC32x32[i],
426                            iq_matrix->ScalingList32x32[i], 64,
427                            gen9_hcpd_context);
428     }
429 }
430
431 static void
432 gen9_hcpd_pic_state(VADriverContextP ctx,
433                     struct decode_state *decode_state,
434                     struct gen9_hcpd_context *gen9_hcpd_context)
435 {
436     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
437     VAPictureParameterBufferHEVC *pic_param;
438     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
439     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
440     /*
441      * 7.4.3.1
442      *
443      * When not present, the value of loop_filter_across_tiles_enabled_flag
444      * is inferred to be equal to 1.
445      */
446     int loop_filter_across_tiles_enabled_flag = 1;
447
448     assert(decode_state->pic_param && decode_state->pic_param->buffer);
449     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
450
451     if (pic_param->pic_fields.bits.pcm_enabled_flag) {
452         max_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3 +
453                               pic_param->log2_diff_max_min_pcm_luma_coding_block_size;
454         min_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3;
455         pcm_sample_bit_depth_luma_minus1 = (pic_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
456         pcm_sample_bit_depth_chroma_minus1 = (pic_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
457     } else {
458         max_pcm_size_minus3 = MIN(pic_param->log2_min_luma_coding_block_size_minus3 + pic_param->log2_diff_max_min_luma_coding_block_size, 2);
459     }
460
461     if (pic_param->pic_fields.bits.tiles_enabled_flag)
462         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
463
464     BEGIN_BCS_BATCH(batch, 19);
465
466     OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
467
468     OUT_BCS_BATCH(batch,
469                   gen9_hcpd_context->picture_height_in_min_cb_minus1 << 16 |
470                   gen9_hcpd_context->picture_width_in_min_cb_minus1);
471     OUT_BCS_BATCH(batch,
472                   max_pcm_size_minus3 << 10 |
473                   min_pcm_size_minus3 << 8 |
474                   (pic_param->log2_min_transform_block_size_minus2 +
475                    pic_param->log2_diff_max_min_transform_block_size) << 6 |
476                   pic_param->log2_min_transform_block_size_minus2 << 4 |
477                   (pic_param->log2_min_luma_coding_block_size_minus3 +
478                    pic_param->log2_diff_max_min_luma_coding_block_size) << 2 |
479                   pic_param->log2_min_luma_coding_block_size_minus3);
480     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
481     OUT_BCS_BATCH(batch,
482                   0 << 27 |
483                   pic_param->pic_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
484                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
485                   pic_param->pic_fields.bits.amp_enabled_flag << 23 |
486                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
487                   !(pic_param->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 21 |
488                   !!(pic_param->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) << 20 |
489                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
490                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
491                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |
492                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
493                   loop_filter_across_tiles_enabled_flag << 15 |
494                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |
495                   pic_param->log2_parallel_merge_level_minus2 << 10 |
496                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |
497                   pic_param->pic_fields.bits.pcm_loop_filter_disabled_flag << 8 |
498                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |
499                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |
500                   pic_param->pic_fields.bits.pcm_enabled_flag << 4 |
501                   pic_param->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag << 3 |
502                   0);
503     OUT_BCS_BATCH(batch,
504                   pic_param->bit_depth_luma_minus8 << 27 |
505                   pic_param->bit_depth_chroma_minus8 << 24 |
506                   pcm_sample_bit_depth_luma_minus1 << 20 |
507                   pcm_sample_bit_depth_chroma_minus1 << 16 |
508                   pic_param->max_transform_hierarchy_depth_inter << 13 |
509                   pic_param->max_transform_hierarchy_depth_intra << 10 |
510                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
511                   (pic_param->pps_cb_qp_offset & 0x1f));
512     OUT_BCS_BATCH(batch,
513                   0 << 29 |
514                   0);
515     OUT_BCS_BATCH(batch, 0);
516     OUT_BCS_BATCH(batch, 0);
517     OUT_BCS_BATCH(batch, 0);
518     OUT_BCS_BATCH(batch, 0); /* DW 10 */
519     OUT_BCS_BATCH(batch, 0);
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522     OUT_BCS_BATCH(batch, 0);
523     OUT_BCS_BATCH(batch, 0); /* DW 15 */
524     OUT_BCS_BATCH(batch, 0);
525     OUT_BCS_BATCH(batch, 0);
526     OUT_BCS_BATCH(batch, 0);
527
528     ADVANCE_BCS_BATCH(batch);
529 }
530
531 static void
532 gen9_hcpd_tile_state(VADriverContextP ctx,
533                      struct decode_state *decode_state,
534                      struct gen9_hcpd_context *gen9_hcpd_context)
535 {
536     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
537     VAPictureParameterBufferHEVC *pic_param;
538     uint8_t pos_col[20], pos_row[24];
539     int i;
540
541     assert(decode_state->pic_param && decode_state->pic_param->buffer);
542     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
543
544     memset(pos_col, 0, sizeof(pos_col));
545     memset(pos_row, 0, sizeof(pos_row));
546
547     for (i = 0; i <= MIN(pic_param->num_tile_columns_minus1, 18); i++)
548         pos_col[i + 1] = pos_col[i] + pic_param->column_width_minus1[i] + 1;
549
550     for (i = 0; i <= MIN(pic_param->num_tile_rows_minus1, 20); i++)
551         pos_row[i + 1] = pos_row[i] + pic_param->row_height_minus1[i] + 1;
552
553     BEGIN_BCS_BATCH(batch, 13);
554
555     OUT_BCS_BATCH(batch, HCP_TILE_STATE | (13 - 2));
556
557     OUT_BCS_BATCH(batch,
558                   pic_param->num_tile_columns_minus1 << 5 |
559                   pic_param->num_tile_rows_minus1);
560     intel_batchbuffer_data(batch, pos_col, 20);
561     intel_batchbuffer_data(batch, pos_row, 24);
562
563     ADVANCE_BCS_BATCH(batch);
564 }
565
566 static int
567 gen9_hcpd_get_reference_picture_frame_id(VAPictureHEVC *ref_pic,
568                                          GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
569 {
570     int i;
571
572     if (ref_pic->picture_id == VA_INVALID_ID ||
573         (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
574         return 0;
575
576     for (i = 0; i < MAX_GEN_HCP_REFERENCE_FRAMES; i++) {
577         if (ref_pic->picture_id == frame_store[i].surface_id) {
578             assert(frame_store[i].frame_store_id < MAX_GEN_HCP_REFERENCE_FRAMES);
579             return frame_store[i].frame_store_id;
580         }
581     }
582
583     /* Should never get here !!! */
584     assert(0);
585     return 0;
586 }
587
588 static void
589 gen9_hcpd_ref_idx_state_1(struct intel_batchbuffer *batch,
590                           int list,
591                           VAPictureParameterBufferHEVC *pic_param,
592                           VASliceParameterBufferHEVC *slice_param,
593                           GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
594 {
595     int i;
596     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
597     uint8_t *ref_list = slice_param->RefPicList[list];
598
599     BEGIN_BCS_BATCH(batch, 18);
600
601     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
602     OUT_BCS_BATCH(batch,
603                   num_ref_minus1 << 1 |
604                   list);
605
606     for (i = 0; i < 16; i++) {
607         if (i < MIN((num_ref_minus1 + 1), 15)) {
608             VAPictureHEVC *ref_pic = &pic_param->ReferenceFrames[ref_list[i]];
609             VAPictureHEVC *curr_pic = &pic_param->CurrPic;
610
611             OUT_BCS_BATCH(batch,
612                           !(ref_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 15 |
613                           !!(ref_pic->flags & VA_PICTURE_HEVC_FIELD_PIC) << 14 |
614                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |
615                           0 << 12 |
616                           0 << 11 |
617                           gen9_hcpd_get_reference_picture_frame_id(ref_pic, frame_store) << 8 |
618                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
619         } else {
620             OUT_BCS_BATCH(batch, 0);
621         }
622     }
623
624     ADVANCE_BCS_BATCH(batch);
625 }
626
627 static void
628 gen9_hcpd_ref_idx_state(VADriverContextP ctx,
629                         VAPictureParameterBufferHEVC *pic_param,
630                         VASliceParameterBufferHEVC *slice_param,
631                         struct gen9_hcpd_context *gen9_hcpd_context)
632 {
633     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
634
635     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
636         return;
637
638     gen9_hcpd_ref_idx_state_1(batch, 0, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
639
640     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
641         return;
642
643     gen9_hcpd_ref_idx_state_1(batch, 1, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
644 }
645
646 static void
647 gen9_hcpd_weightoffset_state_1(struct intel_batchbuffer *batch,
648                                int list,
649                                VASliceParameterBufferHEVC *slice_param)
650 {
651     int i;
652     uint8_t num_ref_minus1 = (list == 1) ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1;
653     int8_t *luma_offset = (list == 1) ? slice_param->luma_offset_l1 : slice_param->luma_offset_l0;
654     int8_t *delta_luma_weight = (list == 1) ? slice_param->delta_luma_weight_l1 : slice_param->delta_luma_weight_l0;
655     int8_t (* chroma_offset)[2] = (list == 1) ? slice_param->ChromaOffsetL1 : slice_param->ChromaOffsetL0;
656     int8_t (* delta_chroma_weight)[2] = (list == 1) ? slice_param->delta_chroma_weight_l1 : slice_param->delta_chroma_weight_l0;
657
658     BEGIN_BCS_BATCH(batch, 34);
659
660     OUT_BCS_BATCH(batch, HCP_WEIGHTOFFSET | (34 - 2));
661     OUT_BCS_BATCH(batch, list);
662
663     for (i = 0; i < 16; i++) {
664         if (i < MIN((num_ref_minus1 + 1), 15)) {
665             OUT_BCS_BATCH(batch,
666                           (luma_offset[i] & 0xff) << 8 |
667                           (delta_luma_weight[i] & 0xff));
668         } else {
669             OUT_BCS_BATCH(batch, 0);
670         }
671     }
672     for (i = 0; i < 16; i++) {
673         if (i < MIN((num_ref_minus1 + 1), 15)) {
674             OUT_BCS_BATCH(batch,
675                           (chroma_offset[i][1] & 0xff) << 24 |
676                           (delta_chroma_weight[i][1] & 0xff) << 16 |
677                           (chroma_offset[i][0] & 0xff) << 8 |
678                           (delta_chroma_weight[i][0] & 0xff));
679         } else {
680             OUT_BCS_BATCH(batch, 0);
681         }
682     }
683
684     ADVANCE_BCS_BATCH(batch);
685 }
686
687 static void
688 gen9_hcpd_weightoffset_state(VADriverContextP ctx,
689                              VAPictureParameterBufferHEVC *pic_param,
690                              VASliceParameterBufferHEVC *slice_param,
691                              struct gen9_hcpd_context *gen9_hcpd_context)
692 {
693     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
694
695     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
696         return;
697
698     if ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P &&
699          !pic_param->pic_fields.bits.weighted_pred_flag) ||
700         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
701          !pic_param->pic_fields.bits.weighted_bipred_flag))
702         return;
703
704     gen9_hcpd_weightoffset_state_1(batch, 0, slice_param);
705
706     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
707         return;
708
709     gen9_hcpd_weightoffset_state_1(batch, 1, slice_param);
710 }
711
712 static int
713 gen9_hcpd_get_collocated_ref_idx(VADriverContextP ctx,
714                                  VAPictureParameterBufferHEVC *pic_param,
715                                  VASliceParameterBufferHEVC *slice_param,
716                                  struct gen9_hcpd_context *gen9_hcpd_context)
717 {
718     uint8_t *ref_list;
719     VAPictureHEVC *ref_pic;
720
721     if (slice_param->collocated_ref_idx > 14)
722         return 0;
723
724     if (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
725         return 0;
726
727     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
728         return 0;
729
730     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P ||
731         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
732          slice_param->LongSliceFlags.fields.collocated_from_l0_flag))
733         ref_list = slice_param->RefPicList[0];
734     else {
735         assert(slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B);
736         ref_list = slice_param->RefPicList[1];
737     }
738
739     ref_pic = &pic_param->ReferenceFrames[ref_list[slice_param->collocated_ref_idx]];
740
741     return gen9_hcpd_get_reference_picture_frame_id(ref_pic, gen9_hcpd_context->reference_surfaces);
742 }
743
744 static int
745 gen9_hcpd_is_list_low_delay(uint8_t ref_list_count,
746                             uint8_t ref_list[15],
747                             VAPictureHEVC *curr_pic,
748                             VAPictureHEVC ref_surfaces[15])
749 {
750     int i;
751
752     for (i = 0; i < MIN(ref_list_count, 15); i++) {
753         VAPictureHEVC *ref_pic;
754
755         if (ref_list[i] > 14)
756             continue;
757
758         ref_pic = &ref_surfaces[ref_list[i]];
759
760         if (ref_pic->pic_order_cnt > curr_pic->pic_order_cnt)
761             return 0;
762     }
763
764     return 1;
765 }
766
767 static int
768 gen9_hcpd_is_low_delay(VADriverContextP ctx,
769                        VAPictureParameterBufferHEVC *pic_param,
770                        VASliceParameterBufferHEVC *slice_param)
771 {
772     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
773         return 0;
774     else if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
775         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
776                                            slice_param->RefPicList[0],
777                                            &pic_param->CurrPic,
778                                            pic_param->ReferenceFrames);
779     else
780         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
781                                            slice_param->RefPicList[0],
782                                            &pic_param->CurrPic,
783                                            pic_param->ReferenceFrames) &&
784                gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l1_active_minus1 + 1,
785                                            slice_param->RefPicList[1],
786                                            &pic_param->CurrPic,
787                                            pic_param->ReferenceFrames);
788 }
789
790 static void
791 gen9_hcpd_slice_state(VADriverContextP ctx,
792                       VAPictureParameterBufferHEVC *pic_param,
793                       VASliceParameterBufferHEVC *slice_param,
794                       VASliceParameterBufferHEVC *next_slice_param,
795                       struct gen9_hcpd_context *gen9_hcpd_context)
796 {
797     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
798     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
799     unsigned short collocated_ref_idx, collocated_from_l0_flag;
800     int sliceqp_sign_flag = 0, sliceqp = 0;
801
802     slice_hor_pos = slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
803     slice_ver_pos = slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
804
805     if (next_slice_param) {
806         next_slice_hor_pos = next_slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
807         next_slice_ver_pos = next_slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
808     } else {
809         next_slice_hor_pos = 0;
810         next_slice_ver_pos = 0;
811     }
812
813     collocated_ref_idx = gen9_hcpd_get_collocated_ref_idx(ctx, pic_param, slice_param, gen9_hcpd_context);
814     collocated_from_l0_flag = slice_param->LongSliceFlags.fields.collocated_from_l0_flag;
815
816     if ((!gen9_hcpd_context->first_inter_slice_valid) &&
817         (slice_param->LongSliceFlags.fields.slice_type != HEVC_SLICE_I) &&
818         slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag) {
819         gen9_hcpd_context->first_inter_slice_collocated_ref_idx = collocated_ref_idx;
820         gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = collocated_from_l0_flag;
821         gen9_hcpd_context->first_inter_slice_valid = 1;
822     }
823
824     /* HW requirement */
825     if (gen9_hcpd_context->first_inter_slice_valid &&
826         ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I) ||
827          (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag))) {
828         collocated_ref_idx = gen9_hcpd_context->first_inter_slice_collocated_ref_idx;
829         collocated_from_l0_flag = gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag;
830     }
831
832     sliceqp = pic_param->init_qp_minus26 + 26 + slice_param->slice_qp_delta;
833     if ((pic_param->bit_depth_luma_minus8 > 0)
834         || (pic_param->bit_depth_chroma_minus8 > 0)) {
835         if (sliceqp < 0) {
836             sliceqp_sign_flag = 1;
837             sliceqp = -sliceqp;
838         }
839     }
840
841     BEGIN_BCS_BATCH(batch, 9);
842
843     OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
844
845     OUT_BCS_BATCH(batch,
846                   slice_ver_pos << 16 |
847                   slice_hor_pos);
848     OUT_BCS_BATCH(batch,
849                   next_slice_ver_pos << 16 |
850                   next_slice_hor_pos);
851     OUT_BCS_BATCH(batch,
852                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
853                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
854                   sliceqp << 6 |
855                   slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag << 5 |
856                   slice_param->LongSliceFlags.fields.dependent_slice_segment_flag << 4 |
857                   sliceqp_sign_flag << 3 |
858                   !next_slice_param << 2 |
859                   slice_param->LongSliceFlags.fields.slice_type);
860     OUT_BCS_BATCH(batch,
861                   collocated_ref_idx << 26 |
862                   (5 - slice_param->five_minus_max_num_merge_cand - 1) << 23 |
863                   slice_param->LongSliceFlags.fields.cabac_init_flag << 22 |
864                   slice_param->luma_log2_weight_denom << 19 |
865                   ((slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) & 0x7) << 16 |
866                   collocated_from_l0_flag << 15 |
867                   gen9_hcpd_is_low_delay(ctx, pic_param, slice_param) << 14 |
868                   slice_param->LongSliceFlags.fields.mvd_l1_zero_flag << 13 |
869                   slice_param->LongSliceFlags.fields.slice_sao_luma_flag << 12 |
870                   slice_param->LongSliceFlags.fields.slice_sao_chroma_flag << 11 |
871                   slice_param->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag << 10 |
872                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
873                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
874                   slice_param->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag);
875     OUT_BCS_BATCH(batch,
876                   slice_param->slice_data_byte_offset); /* DW 5 */
877     OUT_BCS_BATCH(batch,
878                   0 << 26 |
879                   0 << 20 |
880                   0);
881     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
882     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
883
884     ADVANCE_BCS_BATCH(batch);
885 }
886
887 static void
888 gen9_hcpd_bsd_object(VADriverContextP ctx,
889                      VASliceParameterBufferHEVC *slice_param,
890                      struct gen9_hcpd_context *gen9_hcpd_context)
891 {
892     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
893
894     BEGIN_BCS_BATCH(batch, 3);
895
896     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
897
898     OUT_BCS_BATCH(batch, slice_param->slice_data_size);
899     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
900
901     ADVANCE_BCS_BATCH(batch);
902 }
903
904 static VAStatus
905 gen9_hcpd_hevc_decode_picture(VADriverContextP ctx,
906                               struct decode_state *decode_state,
907                               struct gen9_hcpd_context *gen9_hcpd_context)
908 {
909     VAStatus vaStatus;
910     struct i965_driver_data *i965 = i965_driver_data(ctx);
911     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
912     VAPictureParameterBufferHEVC *pic_param;
913     VASliceParameterBufferHEVC *slice_param, *next_slice_param, *next_slice_group_param;
914     dri_bo *slice_data_bo;
915     int i, j;
916
917     vaStatus = gen9_hcpd_hevc_decode_init(ctx, decode_state, gen9_hcpd_context);
918
919     if (vaStatus != VA_STATUS_SUCCESS)
920         goto out;
921
922     assert(decode_state->pic_param && decode_state->pic_param->buffer);
923     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
924
925     if (i965->intel.has_bsd2)
926         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
927     else
928         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
929     intel_batchbuffer_emit_mi_flush(batch);
930
931     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_HEVC, gen9_hcpd_context);
932     gen9_hcpd_surface_state(ctx, decode_state, gen9_hcpd_context);
933     gen9_hcpd_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
934     gen9_hcpd_hevc_qm_state(ctx, decode_state, gen9_hcpd_context);
935     gen9_hcpd_pic_state(ctx, decode_state, gen9_hcpd_context);
936
937     if (pic_param->pic_fields.bits.tiles_enabled_flag)
938         gen9_hcpd_tile_state(ctx, decode_state, gen9_hcpd_context);
939
940     /* Need to double it works or not if the two slice groups have differenct slice data buffers */
941     for (j = 0; j < decode_state->num_slice_params; j++) {
942         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
943         slice_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j]->buffer;
944         slice_data_bo = decode_state->slice_datas[j]->bo;
945
946         gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
947
948         if (j == decode_state->num_slice_params - 1)
949             next_slice_group_param = NULL;
950         else
951             next_slice_group_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j + 1]->buffer;
952
953         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
954             if (i < decode_state->slice_params[j]->num_elements - 1)
955                 next_slice_param = slice_param + 1;
956             else
957                 next_slice_param = next_slice_group_param;
958
959             gen9_hcpd_slice_state(ctx, pic_param, slice_param, next_slice_param, gen9_hcpd_context);
960             gen9_hcpd_ref_idx_state(ctx, pic_param, slice_param, gen9_hcpd_context);
961             gen9_hcpd_weightoffset_state(ctx, pic_param, slice_param, gen9_hcpd_context);
962             gen9_hcpd_bsd_object(ctx, slice_param, gen9_hcpd_context);
963             slice_param++;
964         }
965     }
966
967     intel_batchbuffer_end_atomic(batch);
968     intel_batchbuffer_flush(batch);
969
970 out:
971     return vaStatus;
972 }
973
974 /*********************************************************/
975 /*                  VP9 Code                             */
976 /*********************************************************/
977
978
979 #define VP9_PROB_BUFFER_FIRST_PART_SIZE 2010
980 #define VP9_PROB_BUFFER_SECOND_PART_SIZE 10
981 #define VP9_PROB_BUFFER_KEY_INTER_OFFSET 1667
982 #define VP9_PROB_BUFFER_KEY_INTER_SIZE   343
983
984 #define VP9_PROB_BUFFER_UPDATE_NO   0
985 #define VP9_PROB_BUFFER_UPDATE_SECNE_1    1
986 #define VP9_PROB_BUFFER_UPDATE_SECNE_2    2
987 #define VP9_PROB_BUFFER_UPDATE_SECNE_3    3
988 #define VP9_PROB_BUFFER_UPDATE_SECNE_4    4
989 #define VP9_PROB_BUFFER_UPDATE_SECNE_5    5
990
991 #define VP9_PROB_BUFFER_SAVED_NO   0
992 #define VP9_PROB_BUFFER_SAVED_SECNE_1    1
993 #define VP9_PROB_BUFFER_SAVED_SECNE_2    2
994
995 #define VP9_PROB_BUFFER_RESTORED_NO   0
996 #define VP9_PROB_BUFFER_RESTORED_SECNE_1    1
997 #define VP9_PROB_BUFFER_RESTORED_SECNE_2    2
998 #define VP9_PROB_BUFFER_RESTORED_SECNE_MAX    (VP9_PROB_BUFFER_RESTORED_SECNE_2 + 1)
999
1000 #define ALLOC_MV_BUFFER(gen_buffer, string, size,width,height)  \
1001     do {                                                        \
1002         dri_bo_unreference(gen_buffer->bo);                     \
1003         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
1004                                       string,                   \
1005                                       size,                     \
1006                                       0x1000);                  \
1007         assert(gen_buffer->bo);                                 \
1008         gen_buffer->frame_width  = width ;                      \
1009         gen_buffer->frame_height = height;                      \
1010     } while (0)
1011
1012 static void
1013 vp9_update_segmentId_buffer(VADriverContextP ctx,
1014                             struct decode_state *decode_state,
1015                             struct gen9_hcpd_context *gen9_hcpd_context)
1016 {
1017     struct i965_driver_data *i965 = i965_driver_data(ctx);
1018     VADecPictureParameterBufferVP9 *pic_param;
1019
1020     int size = 0;
1021     int is_scaling = 0;
1022     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1023     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1024
1025     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 1 ;
1026     size <<= 6;
1027     if (gen9_hcpd_context->vp9_segment_id_buffer.bo == NULL || pic_param->frame_width > gen9_hcpd_context->last_frame.frame_width || pic_param->frame_height > gen9_hcpd_context->last_frame.frame_height) {
1028         ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer), "vp9 segment id buffer", size);
1029     }
1030
1031     is_scaling = (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) || (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height);
1032
1033     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1034         pic_param->pic_fields.bits.error_resilient_mode ||
1035         pic_param->pic_fields.bits.intra_only || is_scaling) {
1036
1037         //VP9 Segment ID buffer needs to be zero
1038         dri_bo_map(gen9_hcpd_context->vp9_segment_id_buffer.bo, 1);
1039         memset((unsigned char *)gen9_hcpd_context->vp9_segment_id_buffer.bo->virtual, 0, size);
1040         dri_bo_unmap(gen9_hcpd_context->vp9_segment_id_buffer.bo);
1041     }
1042 }
1043
1044 static void
1045 vp9_update_mv_temporal_buffer(VADriverContextP ctx,
1046                               struct decode_state *decode_state,
1047                               struct gen9_hcpd_context *gen9_hcpd_context)
1048 {
1049     struct i965_driver_data *i965 = i965_driver_data(ctx);
1050     VADecPictureParameterBufferVP9 *pic_param;
1051     int size = 0;
1052
1053     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1054     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1055
1056     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 9 ;
1057     size <<= 6; //CL aligned
1058     if (gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo == NULL || pic_param->frame_width > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width || pic_param->frame_height > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height) {
1059         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_curr), "vp9 curr mv temporal buffer", size, pic_param->frame_width, pic_param->frame_height);
1060     }
1061     if (gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo == NULL) {
1062         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_last), "vp9 last mv temporal buffer", size, pic_param->frame_width, pic_param->frame_height);
1063     }
1064
1065 }
1066
1067 static void
1068 vp9_gen_default_probabilities(VADriverContextP ctx, struct gen9_hcpd_context *gen9_hcpd_context)
1069 {
1070     int i = 0;
1071     uint32_t size = 0;
1072
1073     size = sizeof(FRAME_CONTEXT);
1074     memset(&gen9_hcpd_context->vp9_fc_key_default, 0, size);
1075     memset(&gen9_hcpd_context->vp9_fc_inter_default, 0, size);
1076     memset(&gen9_hcpd_context->vp9_frame_ctx, 0, size * FRAME_CONTEXTS);
1077     //more code to come here below
1078
1079     //1. key default
1080     gen9_hcpd_context->vp9_fc_key_default.tx_probs = default_tx_probs;
1081     //dummy 52
1082     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs4x4, default_coef_probs_4x4);
1083     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs8x8, default_coef_probs_8x8);
1084     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs16x16, default_coef_probs_16x16);
1085     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs32x32, default_coef_probs_32x32);
1086     //dummy 16
1087     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.skip_probs, default_skip_probs);
1088     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.partition_prob, vp9_kf_partition_probs);
1089     //dummy 47
1090     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.uv_mode_prob, vp9_kf_uv_mode_prob);
1091     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_tree_probs, default_seg_tree_probs);
1092     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_pred_probs, default_seg_pred_probs);
1093
1094     //2. inter default
1095     gen9_hcpd_context->vp9_fc_inter_default.tx_probs = default_tx_probs;
1096     //dummy 52
1097     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs4x4, default_coef_probs_4x4);
1098     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs8x8, default_coef_probs_8x8);
1099     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs16x16, default_coef_probs_16x16);
1100     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs32x32, default_coef_probs_32x32);
1101     //dummy 16
1102     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.skip_probs, default_skip_probs);
1103     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.inter_mode_probs, default_inter_mode_probs);
1104     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.switchable_interp_prob, default_switchable_interp_prob);
1105     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.intra_inter_prob, default_intra_inter_p);
1106     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_inter_prob, default_comp_inter_p);
1107     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.single_ref_prob, default_single_ref_p);
1108     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_ref_prob, default_comp_ref_p);
1109     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.y_mode_prob, default_if_y_probs);
1110     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.partition_prob, default_partition_probs);
1111     gen9_hcpd_context->vp9_fc_inter_default.nmvc = default_nmv_context;
1112     //dummy 47
1113     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.uv_mode_prob, default_if_uv_probs);
1114     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_tree_probs, default_seg_tree_probs);
1115     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_pred_probs, default_seg_pred_probs);
1116
1117     for (i = 0; i < FRAME_CONTEXTS; i++) {
1118         gen9_hcpd_context->vp9_frame_ctx[i] = gen9_hcpd_context->vp9_fc_inter_default;
1119     }
1120 }
1121
1122 static void
1123 vp9_update_probabilities(VADriverContextP ctx,
1124                          struct decode_state *decode_state,
1125                          struct gen9_hcpd_context *gen9_hcpd_context)
1126 {
1127     VADecPictureParameterBufferVP9 *pic_param;
1128     int i = 0;
1129
1130     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1131     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1132
1133     //first part buffer update: Case 1)Reset all 4 probablity buffers
1134     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) || pic_param->pic_fields.bits.intra_only || pic_param->pic_fields.bits.error_resilient_mode) {
1135         if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1136             (pic_param->pic_fields.bits.reset_frame_context == 3) ||
1137             pic_param->pic_fields.bits.error_resilient_mode) {
1138             //perform full buffer update
1139             for (i = 0; i < FRAME_CONTEXTS; i++) {
1140                 memcpy(&gen9_hcpd_context->vp9_frame_ctx[i], &gen9_hcpd_context->vp9_fc_inter_default, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1141
1142                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_tree_probs, default_seg_tree_probs);
1143                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_pred_probs, default_seg_pred_probs);
1144             }
1145         } else if (pic_param->pic_fields.bits.reset_frame_context == 2 && pic_param->pic_fields.bits.intra_only) {
1146             memcpy(&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx], &gen9_hcpd_context->vp9_fc_inter_default, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1147         }
1148         pic_param->pic_fields.bits.frame_context_idx = 0;
1149     }
1150
1151     //Case 3) Update only segment probabilities
1152     if ((pic_param->pic_fields.bits.segmentation_enabled &&
1153          pic_param->pic_fields.bits.segmentation_update_map)) {
1154         //Update seg_tree_probs and seg_pred_probs accordingly
1155         for (i = 0; i < SEG_TREE_PROBS; i++) {
1156             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_tree_probs[i] = pic_param->mb_segment_tree_probs[i];
1157         }
1158         for (i = 0; i < PREDICTION_PROBS; i++) {
1159             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_pred_probs[i] = pic_param->segment_pred_probs[i];
1160         }
1161     }
1162
1163     //update vp9_fc according to frame_context_id
1164     {
1165         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1166         void *pprob = NULL;
1167
1168         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo, 1);
1169
1170         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1171         memcpy(pprob, pfc, 2048);
1172         //only update 343bytes for key or intra_only frame
1173         if (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME ||
1174             pic_param->pic_fields.bits.intra_only) {
1175             memcpy(pprob + VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE
1176                    , gen9_hcpd_context->vp9_fc_key_default.inter_mode_probs
1177                    , VP9_PROB_BUFFER_KEY_INTER_SIZE);
1178         }
1179
1180         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1181     }
1182 }
1183
1184 static void
1185 gen9_hcpd_init_vp9_surface(VADriverContextP ctx,
1186                            VADecPictureParameterBufferVP9 *pic_param,
1187                            struct object_surface *obj_surface,
1188                            struct gen9_hcpd_context *gen9_hcpd_context)
1189 {
1190     GenVP9Surface *gen9_vp9_surface;
1191
1192     if (!obj_surface)
1193         return;
1194
1195     obj_surface->free_private_data = gen_free_vp9_surface;
1196     gen9_vp9_surface = obj_surface->private_data;
1197
1198     if (!gen9_vp9_surface) {
1199         gen9_vp9_surface = calloc(sizeof(GenVP9Surface), 1);
1200         assert(gen9_vp9_surface);
1201         gen9_vp9_surface->base.frame_store_id = -1;
1202         obj_surface->private_data = gen9_vp9_surface;
1203     }
1204
1205     gen9_vp9_surface->frame_width  = pic_param->frame_width;
1206     gen9_vp9_surface->frame_height = pic_param->frame_height;
1207
1208 }
1209
1210 static VAStatus
1211 gen9_hcpd_vp9_decode_init(VADriverContextP ctx,
1212                           struct decode_state *decode_state,
1213                           struct gen9_hcpd_context *gen9_hcpd_context)
1214 {
1215     struct i965_driver_data *i965 = i965_driver_data(ctx);
1216     VADecPictureParameterBufferVP9 *pic_param;
1217     struct object_surface *obj_surface;
1218     uint32_t size;
1219     int width_in_mbs = 0, height_in_mbs = 0;
1220     int bit_depth_minus8 = 0;
1221
1222     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1223     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1224
1225     width_in_mbs = (pic_param->frame_width + 15) / 16;
1226     height_in_mbs = (pic_param->frame_height + 15) / 16;
1227
1228     //For BXT, we support only till 4K
1229     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1230     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1231
1232     if (!(i965->codec_info->vp9_dec_profiles & (1U << pic_param->profile)))
1233         return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1234
1235     if (pic_param->profile >= 2) {
1236         if (pic_param->bit_depth >= 8)
1237             bit_depth_minus8 = pic_param->bit_depth - 8;
1238
1239         if (bit_depth_minus8 == 2) {
1240             if (!(i965->codec_info->vp9_dec_chroma_formats & VA_RT_FORMAT_YUV420_10BPP))
1241                 return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1242         } else if ((bit_depth_minus8 > 2) || (bit_depth_minus8 == 1) || (bit_depth_minus8 < 0))
1243             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1244     }
1245
1246     //Update the frame store buffers with the reference frames information
1247     intel_update_vp9_frame_store_index(ctx,
1248                                        decode_state,
1249                                        pic_param,
1250                                        gen9_hcpd_context->reference_surfaces);
1251
1252     /* Current decoded picture */
1253     obj_surface = decode_state->render_object;
1254     //Ensure there is a tiled render surface in NV12 format. If not, create one.
1255     vp9_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1256
1257
1258     //Super block in VP9 is 64x64
1259     gen9_hcpd_context->ctb_size = 64;
1260     gen9_hcpd_context->min_cb_size = 8; //Min block size is 4 or 8?
1261
1262     //If picture width/height is not multiple of 64, needs to upsize it to the next 64 pixels
1263     //before calculation below.
1264     gen9_hcpd_context->picture_width_in_ctbs  = ALIGN(pic_param->frame_width, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1265     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(pic_param->frame_height, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1266
1267     gen9_hcpd_context->picture_width_in_min_cb_minus1  = ALIGN(pic_param->frame_width, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1268     gen9_hcpd_context->picture_height_in_min_cb_minus1 = ALIGN(pic_param->frame_height, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1269
1270     gen9_hcpd_context->picture_width_in_pixels  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1271     gen9_hcpd_context->picture_height_in_pixels = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1272
1273     gen9_hcpd_init_vp9_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
1274
1275     if (pic_param->profile >= 2)
1276         size = gen9_hcpd_context->picture_width_in_ctbs * 36; //num_width_in_SB * 36
1277     else
1278         size = gen9_hcpd_context->picture_width_in_ctbs * 18; //num_width_in_SB * 18
1279     size <<= 6;
1280     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
1281     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1282
1283     if (pic_param->profile >= 2)
1284         size = gen9_hcpd_context->picture_height_in_ctbs * 34; //num_height_in_SB * 17
1285     else
1286         size = gen9_hcpd_context->picture_height_in_ctbs * 17; //num_height_in_SB * 17
1287     size <<= 6;
1288     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1289
1290     size = gen9_hcpd_context->picture_width_in_ctbs * 5; //num_width_in_SB * 5
1291     size <<= 6;
1292     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
1293     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1294
1295     size = gen9_hcpd_context->picture_height_in_ctbs * 5; //num_height_in_SB * 5
1296     size <<= 6;
1297     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1298
1299     size = gen9_hcpd_context->picture_width_in_ctbs * 1; //num_width_in_SB * 1
1300     size <<= 6;
1301     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer), "hvd line rowstore buffer", size);
1302     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer), "hvd tile rowstore buffer", size);
1303
1304     size = 32;
1305     size <<= 6;
1306     ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer), "vp9 probability buffer", size);
1307
1308     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
1309     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
1310     gen9_hcpd_context->first_inter_slice_valid = 0;
1311
1312     return VA_STATUS_SUCCESS;
1313 }
1314
1315 static void
1316 gen9_hcpd_vp9_surface_state(VADriverContextP ctx,
1317                             struct decode_state *decode_state,
1318                             struct gen9_hcpd_context *gen9_hcpd_context)
1319 {
1320     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1321     struct object_surface *obj_surface = decode_state->render_object;
1322     struct object_surface *tmp_obj_surface = NULL;
1323     unsigned int y_cb_offset;
1324     int i = 0;
1325
1326     assert(obj_surface);
1327
1328     y_cb_offset = obj_surface->y_cb_offset;
1329
1330     BEGIN_BCS_BATCH(batch, 3);
1331
1332     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1333     OUT_BCS_BATCH(batch,
1334                   (0 << 28) |                   /* surface id */
1335                   (obj_surface->width - 1));    /* pitch - 1 */
1336     OUT_BCS_BATCH(batch,
1337                   (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1338                   y_cb_offset);
1339     ADVANCE_BCS_BATCH(batch);
1340
1341     tmp_obj_surface = obj_surface;
1342
1343     for (i = 0; i < 3; i++) {
1344         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1345         if (obj_surface && obj_surface->private_data) {
1346             BEGIN_BCS_BATCH(batch, 3);
1347
1348             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1349             OUT_BCS_BATCH(batch,
1350                           ((i + 2) << 28) |                   /* surface id */
1351                           (obj_surface->width - 1));    /* pitch - 1 */
1352             OUT_BCS_BATCH(batch,
1353                           (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1354                           obj_surface->y_cb_offset);
1355             ADVANCE_BCS_BATCH(batch);
1356         } else {
1357             BEGIN_BCS_BATCH(batch, 3);
1358
1359             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1360             OUT_BCS_BATCH(batch,
1361                           ((i + 2) << 28) |                   /* surface id */
1362                           (tmp_obj_surface->width - 1));    /* pitch - 1 */
1363             OUT_BCS_BATCH(batch,
1364                           (((tmp_obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1365                           tmp_obj_surface->y_cb_offset);
1366             ADVANCE_BCS_BATCH(batch);
1367         }
1368     }
1369 }
1370
1371 static void
1372 gen9_hcpd_vp9_pipe_buf_addr_state(VADriverContextP ctx,
1373                                   struct decode_state *decode_state,
1374                                   struct gen9_hcpd_context *gen9_hcpd_context)
1375 {
1376     struct i965_driver_data *i965 = i965_driver_data(ctx);
1377     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1378     struct object_surface *obj_surface;
1379     int i = 0;
1380
1381     BEGIN_BCS_BATCH(batch, 95);
1382
1383     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
1384
1385     obj_surface = decode_state->render_object;
1386     assert(obj_surface && obj_surface->bo);
1387
1388     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
1389     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
1390     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
1391     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
1392     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
1393     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
1394     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
1395     OUT_BUFFER_MA_TARGET(NULL);    /* DW 22..24, ignore for VP9 */
1396     OUT_BUFFER_MA_TARGET(NULL);    /* DW 25..27, ignore for VP9 */
1397     OUT_BUFFER_MA_TARGET(NULL);    /* DW 28..30, ignore for VP9 */
1398     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo); /* DW 31..33 */
1399
1400     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
1401
1402     /* DW 37..52 - Reference picture address */
1403     for (i = 0; i < 3; i++) {
1404         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1405
1406         if (obj_surface) {
1407             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
1408         } else
1409             OUT_BUFFER_NMA_REFERENCE(NULL);
1410     }
1411     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1412         OUT_BUFFER_NMA_REFERENCE(NULL);
1413     }
1414     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
1415
1416     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
1417     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, StreamOutEnable - used for transcoding */
1418     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, DecodedPictureStatusError, ignored */
1419     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, Ignored */
1420
1421     /* DW 66..81 - for 8 Collocated motion vectors */
1422     for (i = 0; i < 1; i++) {
1423         OUT_BUFFER_NMA_REFERENCE(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1424     }
1425     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1426         OUT_BUFFER_NMA_REFERENCE(NULL);
1427     }
1428
1429     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
1430
1431     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_probability_buffer.bo); /* DW 83..85, VP9 Probability bufffer */
1432     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_segment_id_buffer.bo);  /* DW 86..88, VP9 Segment ID buffer */
1433     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_line_rowstore_buffer.bo);/* DW 89..91, VP9 HVD Line Rowstore buffer */
1434     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_tile_rowstore_buffer.bo);/* DW 92..94, VP9 HVD Tile Rowstore buffer */
1435
1436     ADVANCE_BCS_BATCH(batch);
1437 }
1438
1439 static inline int
1440 gen9_hcpd_vp9_valid_ref_frame_size(int ref_width, int ref_height,
1441                                    int cur_width, int cur_height)
1442 {
1443     return 2 * cur_width >= ref_width &&
1444            2 * cur_height >= ref_height &&
1445            cur_width <= 16 * ref_width &&
1446            cur_height <= 16 * ref_height;
1447 }
1448 static void
1449 gen9_hcpd_vp9_pic_state(VADriverContextP ctx,
1450                         struct decode_state *decode_state,
1451                         struct gen9_hcpd_context *gen9_hcpd_context)
1452 {
1453     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1454     VADecPictureParameterBufferVP9 *pic_param;
1455     struct object_surface *obj_surface;
1456     GenVP9Surface *gen9_vp9_surface;
1457     uint16_t scale_h = 0;
1458     uint16_t scale_w = 0;
1459     uint16_t frame_width_in_pixel = 0;
1460     uint16_t frame_height_in_pixel = 0;
1461     uint16_t fwidth = 64;
1462     uint16_t fheight = 64;
1463     int i;
1464     int bit_depth_minus8 = 0;
1465
1466 #define LEN_COMMAND_OWN 12
1467     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1468     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1469
1470     uint8_t segmentIDStreamInEnable = 0;
1471     uint8_t segmentIDStreamOutEnable = (pic_param->pic_fields.bits.segmentation_enabled && pic_param->pic_fields.bits.segmentation_update_map);
1472
1473     // For KEY_FRAME or INTRA_ONLY frame, this bit should be set to "0".
1474     uint8_t segmentation_temporal_update =
1475         ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) || (pic_param->pic_fields.bits.intra_only)) ? 0 : pic_param->pic_fields.bits.segmentation_temporal_update;
1476
1477
1478     if (pic_param->pic_fields.bits.intra_only || (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)) {
1479         segmentIDStreamInEnable = 1;
1480     } else if (pic_param->pic_fields.bits.segmentation_enabled) {
1481         if (!pic_param->pic_fields.bits.segmentation_update_map) {
1482             segmentIDStreamInEnable = 1;
1483
1484         } else if (pic_param->pic_fields.bits.segmentation_temporal_update) {
1485             segmentIDStreamInEnable = 1;
1486         }
1487     }
1488
1489     if (pic_param->pic_fields.bits.error_resilient_mode) {
1490         segmentIDStreamInEnable = 1;
1491     }
1492
1493     //frame type of previous frame (Key or Non-Key Frame).
1494     uint8_t last_frame_type = gen9_hcpd_context->last_frame.frame_type;
1495
1496     uint8_t use_pre_frame_mvs = 0;
1497     use_pre_frame_mvs = !((pic_param->pic_fields.bits.error_resilient_mode) ||
1498                           (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) ||
1499                           (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height) ||
1500                           (pic_param->pic_fields.bits.intra_only) ||
1501                           (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1502                           (gen9_hcpd_context->last_frame.intra_only) ||
1503                           (last_frame_type == HCP_VP9_KEY_FRAME) ||
1504                           (!gen9_hcpd_context->last_frame.show_frame));
1505
1506     uint8_t adapt_probabilities_flag = 0;
1507     if ((pic_param->pic_fields.bits.error_resilient_mode || pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1508         adapt_probabilities_flag = 0; //0: Do not adapt (error resilient or frame_parallel_mode are set)
1509     else if (!(pic_param->pic_fields.bits.error_resilient_mode) && !(pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1510         adapt_probabilities_flag = 1; //1: Adapt (not error resilient and not frame_ parallel_mode)
1511
1512     frame_width_in_pixel  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1513     frame_height_in_pixel = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1514
1515     fwidth = (fwidth > frame_width_in_pixel) ? frame_width_in_pixel : fwidth;
1516     fheight = (fheight > frame_height_in_pixel) ? frame_height_in_pixel : fheight;
1517
1518     if (pic_param->profile >= 2) {
1519         if (pic_param->bit_depth >= 8)
1520             bit_depth_minus8 = pic_param->bit_depth - 8;
1521     }
1522
1523     BEGIN_BCS_BATCH(batch, LEN_COMMAND_OWN);
1524
1525     OUT_BCS_BATCH(batch, HCP_VP9_PIC_STATE | (LEN_COMMAND_OWN - 2));
1526
1527     OUT_BCS_BATCH(batch,
1528                   (frame_height_in_pixel - 1) << 16 |
1529                   (frame_width_in_pixel - 1));         /* DW 1 */
1530     OUT_BCS_BATCH(batch,
1531                   segmentIDStreamInEnable << 31 |
1532                   segmentIDStreamOutEnable << 30 |
1533                   pic_param->pic_fields.bits.lossless_flag << 29 |
1534                   segmentation_temporal_update << 28 |
1535                   pic_param->pic_fields.bits.segmentation_update_map << 27 |
1536                   pic_param->pic_fields.bits.segmentation_enabled << 26   |
1537                   pic_param->sharpness_level << 23 |
1538                   pic_param->filter_level << 17 |
1539                   pic_param->pic_fields.bits.frame_parallel_decoding_mode << 16 |
1540                   pic_param->pic_fields.bits.error_resilient_mode << 15 |
1541                   pic_param->pic_fields.bits.refresh_frame_context << 14 |
1542                   last_frame_type << 13 |
1543                   0 << 12 |   /* tx select mode */
1544                   0 << 11 |   /* Hybrid Prediction Mode */
1545                   use_pre_frame_mvs << 10 |
1546                   pic_param->pic_fields.bits.alt_ref_frame_sign_bias << 9 |
1547                   pic_param->pic_fields.bits.golden_ref_frame_sign_bias << 8 |
1548                   pic_param->pic_fields.bits.last_ref_frame_sign_bias << 7 |
1549                   pic_param->pic_fields.bits.mcomp_filter_type << 4 |
1550                   pic_param->pic_fields.bits.allow_high_precision_mv << 3 |
1551                   pic_param->pic_fields.bits.intra_only << 2 |
1552                   adapt_probabilities_flag << 1 |
1553                   pic_param->pic_fields.bits.frame_type << 0);              /* DW 2 */
1554     OUT_BCS_BATCH(batch,
1555                   pic_param->profile << 28 |
1556                   bit_depth_minus8 << 24 |
1557                   0 << 22 | /* only support 4:2:0 */
1558                   pic_param->log2_tile_rows << 8 |
1559                   pic_param->log2_tile_columns << 0);                      /* DW 3 */
1560     // resolution change case
1561
1562     // DW4-DW6
1563     for (i = 0; i < 3; i++) {
1564         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1565         gen9_vp9_surface = NULL;
1566         scale_w = 0;
1567         scale_h = 0;
1568         if (obj_surface && obj_surface->private_data) {
1569             gen9_vp9_surface = obj_surface->private_data;
1570             if (!gen9_hcpd_vp9_valid_ref_frame_size(gen9_vp9_surface->frame_width, gen9_vp9_surface->frame_height, pic_param->frame_width, pic_param->frame_height)) {
1571                 scale_w = -1;
1572                 scale_h = -1;
1573             } else {
1574                 scale_w = (gen9_vp9_surface->frame_width  << 14) / pic_param->frame_width ;
1575                 scale_h = (gen9_vp9_surface->frame_height << 14) / pic_param->frame_height ;
1576             }
1577             OUT_BCS_BATCH(batch,
1578                           scale_w << 16 |
1579                           scale_h);
1580         } else {
1581             OUT_BCS_BATCH(batch, 0);
1582         }
1583     }
1584
1585     // DW7-DW9
1586     for (i = 0; i < 3; i++) {
1587         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1588         gen9_vp9_surface = NULL;
1589
1590         if (obj_surface && obj_surface->private_data) {
1591             gen9_vp9_surface = obj_surface->private_data;
1592             OUT_BCS_BATCH(batch,
1593                           ((gen9_vp9_surface->frame_height - 1) & 0x3fff) << 16 |
1594                           ((gen9_vp9_surface->frame_width - 1) & 0x3fff));
1595         } else {
1596             OUT_BCS_BATCH(batch, 0);
1597         }
1598     }
1599
1600     OUT_BCS_BATCH(batch,
1601                   pic_param->first_partition_size << 16 |
1602                   pic_param->frame_header_length_in_bytes << 0); /* DW 10 */
1603     OUT_BCS_BATCH(batch,
1604                   (0 << 3) |
1605                   (0 << 2) |
1606                   (1 << 1) |
1607                   (0 << 0)); /* DW 11, ignored */
1608     //Rest of the DWs are not valid for BXT
1609     for (i = 12; i < LEN_COMMAND_OWN; i++) {
1610         OUT_BCS_BATCH(batch, 0);
1611     }
1612
1613     ADVANCE_BCS_BATCH(batch);
1614
1615 }
1616
1617 static void
1618 gen9_hcpd_vp9_segment_state(VADriverContextP ctx,
1619                             VADecPictureParameterBufferVP9 *pic_param,
1620                             VASegmentParameterVP9 *seg_param, uint8_t seg_id,
1621                             struct gen9_hcpd_context *gen9_hcpd_context)
1622 {
1623     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1624
1625     int segment_ref = seg_param->segment_flags.fields.segment_reference;
1626
1627     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)
1628         || (pic_param->pic_fields.bits.intra_only))
1629         segment_ref = 0;
1630
1631     BEGIN_BCS_BATCH(batch, 7);
1632
1633     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (7 - 2));
1634     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
1635     OUT_BCS_BATCH(batch,
1636                   seg_param->segment_flags.fields.segment_reference_enabled << 3 |
1637                   segment_ref << 1 |
1638                   seg_param->segment_flags.fields.segment_reference_skipped << 0); /* DW 2 */
1639     if (pic_param->filter_level) {
1640         OUT_BCS_BATCH(batch,
1641                       seg_param->filter_level[1][1] << 24    | //FilterLevelRef1Mode1
1642                       seg_param->filter_level[1][0] << 16    | //FilterLevelRef1Mode0
1643                       seg_param->filter_level[0][1] << 8     | //FilterLevelRef0Mode1
1644                       seg_param->filter_level[0][0] << 0);      //FilterLevelRef0Mode0 /* DW 3 */
1645         OUT_BCS_BATCH(batch,
1646                       seg_param->filter_level[3][1] << 24    | //FilterLevelRef3Mode1
1647                       seg_param->filter_level[3][0] << 16    | //FilterLevelRef3Mode0
1648                       seg_param->filter_level[2][1] << 8     | //FilterLevelRef2Mode1
1649                       seg_param->filter_level[2][0] << 0);     //FilterLevelRef2Mode0 /* DW 4 */
1650     } else {
1651         OUT_BCS_BATCH(batch,
1652                       0);       /* DW 3 */
1653         OUT_BCS_BATCH(batch,
1654                       0);       /* DW 4 */
1655     }
1656     OUT_BCS_BATCH(batch,
1657                   seg_param->luma_ac_quant_scale << 16   |
1658                   seg_param->luma_dc_quant_scale << 0);     /* DW 5 */
1659     OUT_BCS_BATCH(batch,
1660                   seg_param->chroma_ac_quant_scale << 16 |
1661                   seg_param->chroma_dc_quant_scale << 0);   /* DW 6 */
1662
1663     ADVANCE_BCS_BATCH(batch);
1664
1665 }
1666
1667 static void
1668 gen9_hcpd_vp9_bsd_object(VADriverContextP ctx,
1669                          VADecPictureParameterBufferVP9 *pic_param,
1670                          VASliceParameterBufferVP9 *slice_param,
1671                          struct gen9_hcpd_context *gen9_hcpd_context)
1672 {
1673     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1674     int slice_data_size   = slice_param->slice_data_size - pic_param->frame_header_length_in_bytes;
1675     int slice_data_offset = slice_param->slice_data_offset + pic_param->frame_header_length_in_bytes;
1676
1677     BEGIN_BCS_BATCH(batch, 3);
1678
1679     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
1680
1681     OUT_BCS_BATCH(batch, slice_data_size);
1682     OUT_BCS_BATCH(batch, slice_data_offset);
1683
1684     ADVANCE_BCS_BATCH(batch);
1685
1686 }
1687
1688 static VAStatus
1689 gen9_hcpd_vp9_decode_picture(VADriverContextP ctx,
1690                              struct decode_state *decode_state,
1691                              struct gen9_hcpd_context *gen9_hcpd_context)
1692 {
1693     VAStatus vaStatus = VA_STATUS_SUCCESS;
1694     struct i965_driver_data *i965 = i965_driver_data(ctx);
1695     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1696     VADecPictureParameterBufferVP9 *pic_param;
1697     VASliceParameterBufferVP9 *slice_param;
1698     dri_bo *slice_data_bo;
1699     dri_bo *tmp_bo;
1700     uint16_t tmp;
1701     int i = 0, num_segments = 0;
1702
1703     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1704     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
1705     slice_param = (VASliceParameterBufferVP9 *)decode_state->slice_params[0]->buffer;
1706     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1707
1708     if (slice_param->slice_data_size == 1) {
1709         goto out;
1710     }
1711
1712     vaStatus = gen9_hcpd_vp9_decode_init(ctx, decode_state, gen9_hcpd_context);
1713
1714     if (vaStatus != VA_STATUS_SUCCESS)
1715         goto out;
1716
1717     //Update segment id buffer if needed
1718     vp9_update_segmentId_buffer(ctx, decode_state, gen9_hcpd_context);
1719     //Update mv buffer if needed
1720     vp9_update_mv_temporal_buffer(ctx, decode_state, gen9_hcpd_context);
1721     //Update probability buffer if needed
1722     vp9_update_probabilities(ctx, decode_state, gen9_hcpd_context);
1723
1724     if (i965->intel.has_bsd2)
1725         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
1726     else
1727         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1728     intel_batchbuffer_emit_mi_flush(batch);
1729
1730     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_VP9, gen9_hcpd_context);
1731     //Not sure what the surface id value should be: Gold? ALtRef? PrevRef? or Just RefPic?
1732     gen9_hcpd_vp9_surface_state(ctx, decode_state, gen9_hcpd_context);
1733
1734     //Only one VASliceParameterBufferVP9 should be sent per frame
1735     slice_data_bo = decode_state->slice_datas[0]->bo;
1736
1737     gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1738
1739     gen9_hcpd_vp9_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
1740     //If segmentation is disabled, only SegParam[0] is valid,
1741     //all others should be populated with 0
1742     if (!pic_param->pic_fields.bits.segmentation_enabled)
1743         num_segments = 1;
1744     else  //If segmentation is enabled, all 8 entries should be valid.
1745         num_segments = 8;
1746
1747     for (i = 0; i < num_segments; i++) {
1748         VASegmentParameterVP9 seg_param = slice_param->seg_param[i];
1749         gen9_hcpd_vp9_segment_state(ctx, pic_param, &seg_param, i, gen9_hcpd_context);
1750     }
1751
1752     gen9_hcpd_vp9_pic_state(ctx, decode_state, gen9_hcpd_context);
1753
1754     gen9_hcpd_vp9_bsd_object(ctx, pic_param, slice_param, gen9_hcpd_context);
1755
1756     intel_batchbuffer_end_atomic(batch);
1757     intel_batchbuffer_flush(batch);
1758
1759     // keep track of the last frame status
1760     gen9_hcpd_context->last_frame.frame_width = pic_param->frame_width;
1761     gen9_hcpd_context->last_frame.frame_height = pic_param->frame_height;
1762     gen9_hcpd_context->last_frame.show_frame = pic_param->pic_fields.bits.show_frame;
1763     gen9_hcpd_context->last_frame.frame_type = pic_param->pic_fields.bits.frame_type;
1764     gen9_hcpd_context->last_frame.refresh_frame_context = pic_param->pic_fields.bits.refresh_frame_context;
1765     gen9_hcpd_context->last_frame.frame_context_idx = pic_param->pic_fields.bits.frame_context_idx;
1766     gen9_hcpd_context->last_frame.intra_only = pic_param->pic_fields.bits.intra_only;
1767
1768     // switch mv buffer
1769     if (pic_param->pic_fields.bits.frame_type != HCP_VP9_KEY_FRAME) {
1770         tmp_bo = gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo;
1771         gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo;
1772         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo = tmp_bo;
1773
1774         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width;
1775         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width;
1776         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width = tmp;
1777
1778         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height;
1779         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height;
1780         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height = tmp;
1781
1782
1783     }
1784     //update vp9_frame_ctx according to frame_context_id
1785     if (pic_param->pic_fields.bits.refresh_frame_context) {
1786         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1787         void *pprob = NULL;
1788
1789         //update vp9_fc to frame_context
1790         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo, 1);
1791         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1792         if (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME ||
1793             pic_param->pic_fields.bits.intra_only)
1794             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE);
1795         else
1796             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1797
1798         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1799     }
1800
1801 out:
1802     return vaStatus;
1803 }
1804
1805
1806 static VAStatus
1807 gen9_hcpd_decode_picture(VADriverContextP ctx,
1808                          VAProfile profile,
1809                          union codec_state *codec_state,
1810                          struct hw_context *hw_context)
1811 {
1812     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1813     struct decode_state *decode_state = &codec_state->decode;
1814     VAStatus vaStatus;
1815
1816     assert(gen9_hcpd_context);
1817
1818     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
1819
1820     if (vaStatus != VA_STATUS_SUCCESS)
1821         goto out;
1822
1823     switch (profile) {
1824     case VAProfileHEVCMain:
1825     case VAProfileHEVCMain10:
1826         vaStatus = gen9_hcpd_hevc_decode_picture(ctx, decode_state, gen9_hcpd_context);
1827         break;
1828     case VAProfileVP9Profile0:
1829     case VAProfileVP9Profile2:
1830         vaStatus = gen9_hcpd_vp9_decode_picture(ctx, decode_state, gen9_hcpd_context);
1831         break;
1832
1833     default:
1834         /* should never get here 1!! */
1835         assert(0);
1836         break;
1837     }
1838
1839 out:
1840     return vaStatus;
1841 }
1842
1843 static void
1844 gen9_hcpd_context_destroy(void *hw_context)
1845 {
1846     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1847
1848     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer));
1849     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer));
1850     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer));
1851     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer));
1852     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer));
1853     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer));
1854     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer));
1855     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer));
1856     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer));
1857     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer));
1858     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer));
1859     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer));
1860     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer));
1861     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo);
1862     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1863
1864     intel_batchbuffer_free(gen9_hcpd_context->base.batch);
1865     free(gen9_hcpd_context);
1866 }
1867
1868 static void
1869 gen9_hcpd_hevc_context_init(VADriverContextP ctx,
1870                             struct gen9_hcpd_context *gen9_hcpd_context)
1871 {
1872     hevc_gen_default_iq_matrix(&gen9_hcpd_context->iq_matrix_hevc);
1873 }
1874
1875 static void
1876 gen9_hcpd_vp9_context_init(VADriverContextP ctx,
1877                            struct gen9_hcpd_context *gen9_hcpd_context)
1878 {
1879
1880     gen9_hcpd_context->last_frame.frame_height  = 0;
1881     gen9_hcpd_context->last_frame.show_frame    = 0;
1882     gen9_hcpd_context->last_frame.frame_type    = 0;
1883     gen9_hcpd_context->last_frame.refresh_frame_context = 0;
1884     gen9_hcpd_context->last_frame.frame_context_idx = 0;
1885     gen9_hcpd_context->last_frame.intra_only = 0;
1886     gen9_hcpd_context->last_frame.prob_buffer_saved_flag = 0;
1887     gen9_hcpd_context->last_frame.prob_buffer_restored_flag = 0;
1888
1889     //Super block in VP9 is 64x64
1890     gen9_hcpd_context->ctb_size = 64;
1891     gen9_hcpd_context->min_cb_size = 8; //Min block size is 8
1892
1893     vp9_gen_default_probabilities(ctx, gen9_hcpd_context);
1894 }
1895
1896 static struct hw_context *
1897 gen9_hcpd_context_init(VADriverContextP ctx, struct object_config *object_config)
1898 {
1899     struct intel_driver_data *intel = intel_driver_data(ctx);
1900     struct gen9_hcpd_context *gen9_hcpd_context = calloc(1, sizeof(struct gen9_hcpd_context));
1901     int i;
1902
1903     if (!gen9_hcpd_context)
1904         return NULL;
1905
1906     gen9_hcpd_context->base.destroy = gen9_hcpd_context_destroy;
1907     gen9_hcpd_context->base.run = gen9_hcpd_decode_picture;
1908     gen9_hcpd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
1909
1910     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1911         gen9_hcpd_context->reference_surfaces[i].surface_id = VA_INVALID_ID;
1912         gen9_hcpd_context->reference_surfaces[i].frame_store_id = -1;
1913         gen9_hcpd_context->reference_surfaces[i].obj_surface = NULL;
1914     }
1915
1916     switch (object_config->profile) {
1917     case VAProfileHEVCMain:
1918     case VAProfileHEVCMain10:
1919         gen9_hcpd_hevc_context_init(ctx, gen9_hcpd_context);
1920         break;
1921     case VAProfileVP9Profile0:
1922     case VAProfileVP9Profile2:
1923         gen9_hcpd_vp9_context_init(ctx, gen9_hcpd_context);
1924         break;
1925
1926     default:
1927         break;
1928     }
1929
1930     return (struct hw_context *)gen9_hcpd_context;
1931 }
1932
1933 struct hw_context *
1934 gen9_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
1935 {
1936     if (obj_config->profile == VAProfileHEVCMain ||
1937         obj_config->profile == VAProfileHEVCMain10 ||
1938         obj_config->profile == VAProfileVP9Profile0 ||
1939         obj_config->profile == VAProfileVP9Profile2) {
1940         return gen9_hcpd_context_init(ctx, obj_config);
1941     } else {
1942         return gen8_dec_hw_context_init(ctx, obj_config);
1943     }
1944 }
1945
1946 void gen9_max_resolution(struct i965_driver_data *i965,
1947                          struct object_config *obj_config,
1948                          int *w,                                /* out */
1949                          int *h)                                /* out */
1950 {
1951     if (obj_config->profile == VAProfileJPEGBaseline) {
1952         *w = 8192;
1953         *h = 8192;
1954     } else {
1955         *w = i965->codec_info->max_width;
1956         *h = i965->codec_info->max_height;
1957     }
1958 }