OSDN Git Service

Fix typo of variable name in JPEG decoding
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfd.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va.h>
32 #include <va/va_dec_hevc.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "i965_decoder_utils.h"
39
40 #include "gen9_mfd.h"
41 #include "intel_media.h"
42 #include "vp9_probs.h"
43
44 #define OUT_BUFFER(buf_bo, is_target, ma)  do {                         \
45         if (buf_bo) {                                                   \
46             OUT_BCS_RELOC64(batch,                                        \
47                           buf_bo,                                       \
48                           I915_GEM_DOMAIN_RENDER,                       \
49                           is_target ? I915_GEM_DOMAIN_RENDER : 0,       \
50                           0);                                           \
51         } else {                                                        \
52             OUT_BCS_BATCH(batch, 0);                                    \
53             OUT_BCS_BATCH(batch, 0);                                    \
54         }                                                               \
55         if (ma)                                                         \
56             OUT_BCS_BATCH(batch, i965->intel.mocs_state);                                    \
57     } while (0)
58
59 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER(buf_bo, 1, 1)
60 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER(buf_bo, 0, 1)
61 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER(buf_bo, 1, 0)
62 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER(buf_bo, 0, 0)
63
64 static void
65 gen9_hcpd_init_hevc_surface(VADriverContextP ctx,
66                             VAPictureParameterBufferHEVC *pic_param,
67                             struct object_surface *obj_surface,
68                             struct gen9_hcpd_context *gen9_hcpd_context)
69 {
70     struct i965_driver_data *i965 = i965_driver_data(ctx);
71     GenHevcSurface *gen9_hevc_surface;
72
73     if (!obj_surface)
74         return;
75
76     obj_surface->free_private_data = gen_free_hevc_surface;
77     gen9_hevc_surface = obj_surface->private_data;
78
79     if (!gen9_hevc_surface) {
80         gen9_hevc_surface = calloc(sizeof(GenHevcSurface), 1);
81         assert(gen9_hevc_surface);
82         gen9_hevc_surface->base.frame_store_id = -1;
83         obj_surface->private_data = gen9_hevc_surface;
84     }
85
86     if (gen9_hevc_surface->motion_vector_temporal_bo == NULL) {
87         uint32_t size;
88
89         if (gen9_hcpd_context->ctb_size == 16)
90             size = ((gen9_hcpd_context->picture_width_in_pixels + 63) >> 6) *
91                    ((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4);
92         else
93             size = ((gen9_hcpd_context->picture_width_in_pixels + 31) >> 5) *
94                    ((gen9_hcpd_context->picture_height_in_pixels + 31) >> 5);
95
96         size <<= 6; /* in unit of 64bytes */
97         gen9_hevc_surface->motion_vector_temporal_bo = dri_bo_alloc(i965->intel.bufmgr,
98                                                                     "motion vector temporal buffer",
99                                                                     size,
100                                                                     0x1000);
101     }
102 }
103
104 static VAStatus
105 gen9_hcpd_hevc_decode_init(VADriverContextP ctx,
106                            struct decode_state *decode_state,
107                            struct gen9_hcpd_context *gen9_hcpd_context)
108 {
109     struct i965_driver_data *i965 = i965_driver_data(ctx);
110     VAPictureParameterBufferHEVC *pic_param;
111     struct object_surface *obj_surface;
112     uint32_t size;
113     int size_shift = 3;
114
115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
116     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
117     intel_update_hevc_frame_store_index(ctx,
118                                         decode_state,
119                                         pic_param,
120                                         gen9_hcpd_context->reference_surfaces,
121                                         &gen9_hcpd_context->fs_ctx);
122
123     gen9_hcpd_context->picture_width_in_pixels = pic_param->pic_width_in_luma_samples;
124     gen9_hcpd_context->picture_height_in_pixels = pic_param->pic_height_in_luma_samples;
125     gen9_hcpd_context->ctb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 +
126                                          3 +
127                                          pic_param->log2_diff_max_min_luma_coding_block_size));
128     gen9_hcpd_context->picture_width_in_ctbs = ALIGN(gen9_hcpd_context->picture_width_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
129     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(gen9_hcpd_context->picture_height_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
130     gen9_hcpd_context->min_cb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 + 3));
131     gen9_hcpd_context->picture_width_in_min_cb_minus1 = gen9_hcpd_context->picture_width_in_pixels / gen9_hcpd_context->min_cb_size - 1;
132     gen9_hcpd_context->picture_height_in_min_cb_minus1 = gen9_hcpd_context->picture_height_in_pixels / gen9_hcpd_context->min_cb_size - 1;
133
134     /* Current decoded picture */
135     obj_surface = decode_state->render_object;
136     hevc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
137     gen9_hcpd_init_hevc_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
138
139     if ((pic_param->bit_depth_luma_minus8 > 0)
140         || (pic_param->bit_depth_chroma_minus8 > 0))
141         size_shift = 2;
142
143     size = ALIGN(gen9_hcpd_context->picture_width_in_pixels, 32) >> size_shift;
144     size <<= 6;
145     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
146     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
147
148     size = ALIGN(gen9_hcpd_context->picture_height_in_pixels + 6 * gen9_hcpd_context->picture_height_in_ctbs, 32) >> size_shift;
149     size <<= 6;
150     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
151
152     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 188 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
153     size <<= 6;
154     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
155
156     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 172 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
157     size <<= 6;
158     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
159
160     if (IS_CHERRYVIEW(i965->intel.device_info))
161         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 256 + 9 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
162     else
163         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 176 + 89 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
164     size <<= 6;
165     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
166
167     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 3 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
168     size <<= 6;
169     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer), "sao line buffer", size);
170
171     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
172     size <<= 6;
173     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer), "sao tile line buffer", size);
174
175     size = ALIGN(((gen9_hcpd_context->picture_height_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_height_in_ctbs), 16) >> size_shift;
176     size <<= 6;
177     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer), "sao tile column buffer", size);
178
179     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
180     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
181     gen9_hcpd_context->first_inter_slice_valid = 0;
182
183     return VA_STATUS_SUCCESS;
184 }
185
186 static void
187 gen9_hcpd_pipe_mode_select(VADriverContextP ctx,
188                            struct decode_state *decode_state,
189                            int codec,
190                            struct gen9_hcpd_context *gen9_hcpd_context)
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
194
195     assert((codec == HCP_CODEC_HEVC) || (codec == HCP_CODEC_VP9));
196
197     if (IS_KBL(i965->intel.device_info) ||
198         IS_GLK(i965->intel.device_info) ||
199         IS_GEN10(i965->intel.device_info)) {
200         BEGIN_BCS_BATCH(batch, 6);
201
202         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
203     } else {
204         BEGIN_BCS_BATCH(batch, 4);
205
206         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
207     }
208     OUT_BCS_BATCH(batch,
209                   (codec << 5) |
210                   (0 << 3) | /* disable Pic Status / Error Report */
211                   HCP_CODEC_SELECT_DECODE);
212     OUT_BCS_BATCH(batch, 0);
213     OUT_BCS_BATCH(batch, 0);
214
215     if (IS_KBL(i965->intel.device_info) ||
216         IS_GLK(i965->intel.device_info)) {
217         if (codec == HCP_CODEC_VP9)
218             OUT_BCS_BATCH(batch, 1 << 6);
219         else
220             OUT_BCS_BATCH(batch, 0);
221
222         OUT_BCS_BATCH(batch, 0);
223     } else if (IS_GEN10(i965->intel.device_info)) {
224         OUT_BCS_BATCH(batch, 0);
225         OUT_BCS_BATCH(batch, 0);
226     }
227
228     ADVANCE_BCS_BATCH(batch);
229 }
230
231 static void
232 gen9_hcpd_surface_state(VADriverContextP ctx,
233                         struct decode_state *decode_state,
234                         struct gen9_hcpd_context *gen9_hcpd_context)
235 {
236     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
237     struct object_surface *obj_surface = decode_state->render_object;
238     unsigned int y_cb_offset;
239     VAPictureParameterBufferHEVC *pic_param;
240
241     assert(obj_surface);
242
243     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
244     y_cb_offset = obj_surface->y_cb_offset;
245
246     BEGIN_BCS_BATCH(batch, 3);
247
248     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
249     OUT_BCS_BATCH(batch,
250                   (0 << 28) |                   /* surface id */
251                   (obj_surface->width - 1));    /* pitch - 1 */
252     if ((pic_param->bit_depth_luma_minus8 > 0)
253         || (pic_param->bit_depth_chroma_minus8 > 0)) {
254         OUT_BCS_BATCH(batch,
255                       (SURFACE_FORMAT_P010 << 28) |
256                       y_cb_offset);
257     } else {
258         OUT_BCS_BATCH(batch,
259                       (SURFACE_FORMAT_PLANAR_420_8 << 28) |
260                       y_cb_offset);
261     }
262
263     ADVANCE_BCS_BATCH(batch);
264 }
265
266 static void
267 gen9_hcpd_pipe_buf_addr_state(VADriverContextP ctx,
268                               struct decode_state *decode_state,
269                               struct gen9_hcpd_context *gen9_hcpd_context)
270 {
271     struct i965_driver_data *i965 = i965_driver_data(ctx);
272     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
273     struct object_surface *obj_surface;
274     GenHevcSurface *gen9_hevc_surface;
275     int i;
276
277     BEGIN_BCS_BATCH(batch, 95);
278
279     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
280
281     obj_surface = decode_state->render_object;
282     assert(obj_surface && obj_surface->bo);
283     gen9_hevc_surface = obj_surface->private_data;
284     assert(gen9_hevc_surface && gen9_hevc_surface->motion_vector_temporal_bo);
285
286     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
287     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
288     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
289     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
290     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
291     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
292     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
293     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_line_buffer.bo);              /* DW 22..24 */
294     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
295     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
296     OUT_BUFFER_MA_TARGET(gen9_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
297     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
298
299     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
300         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
301
302         if (obj_surface)
303             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
304         else
305             OUT_BUFFER_NMA_REFERENCE(NULL);
306     }
307     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
308
309     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
310     OUT_BUFFER_MA_TARGET(NULL);
311     OUT_BUFFER_MA_TARGET(NULL);
312     OUT_BUFFER_MA_TARGET(NULL);
313
314     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
315         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
316         gen9_hevc_surface = NULL;
317
318         if (obj_surface && obj_surface->private_data)
319             gen9_hevc_surface = obj_surface->private_data;
320
321         if (gen9_hevc_surface)
322             OUT_BUFFER_NMA_REFERENCE(gen9_hevc_surface->motion_vector_temporal_bo);
323         else
324             OUT_BUFFER_NMA_REFERENCE(NULL);
325     }
326     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
327
328     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
329     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
330     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
331     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
332
333     ADVANCE_BCS_BATCH(batch);
334 }
335
336 static void
337 gen9_hcpd_ind_obj_base_addr_state(VADriverContextP ctx,
338                                   dri_bo *slice_data_bo,
339                                   struct gen9_hcpd_context *gen9_hcpd_context)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
343
344     BEGIN_BCS_BATCH(batch, 14);
345
346     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
347     OUT_BUFFER_MA_REFERENCE(slice_data_bo);        /* DW 1..3 */
348     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_RENDER, 0, ALIGN(slice_data_bo->size, 4096));
349     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 6..8, CU, ignored */
350     OUT_BUFFER_MA_TARGET(NULL);                    /* DW 9..11, PAK-BSE, ignored */
351     OUT_BUFFER_NMA_TARGET(NULL);                   /* DW 12..13, Upper Bound  */
352
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen10_hcpd_ind_obj_base_addr_state(VADriverContextP ctx,
358                                    dri_bo *slice_data_bo,
359                                    struct gen9_hcpd_context *gen9_hcpd_context)
360 {
361     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
362
363     BEGIN_BCS_BATCH(batch, 29);
364
365     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
366
367     /* DW1..5 indirect bitstream*/
368     OUT_RELOC64(batch,
369                 slice_data_bo,
370                 I915_GEM_DOMAIN_INSTRUCTION, 0,
371                 0);
372     OUT_BCS_BATCH(batch, 0);
373     OUT_BCS_RELOC64(batch, slice_data_bo,
374                     I915_GEM_DOMAIN_RENDER,
375                     0, ALIGN(slice_data_bo->size, 4096));
376
377     /* DW 6..8 Indirect CU */
378     OUT_BCS_BATCH(batch, 0);
379     OUT_BCS_BATCH(batch, 0);
380     OUT_BCS_BATCH(batch, 0);
381
382     /* DW 9..13 Indirect PAK_PSE */
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388
389     /* DW 14..16. Compressed_header */
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393
394     /* DW 17..19 . Prob-counter Stream-out */
395     OUT_BCS_BATCH(batch, 0);
396     OUT_BCS_BATCH(batch, 0);
397     OUT_BCS_BATCH(batch, 0);
398
399     /* DW 20..22. Prob-delta stream-in */
400     OUT_BCS_BATCH(batch, 0);
401     OUT_BCS_BATCH(batch, 0);
402     OUT_BCS_BATCH(batch, 0);
403
404     /* DW 23..25. Tile Record */
405     OUT_BCS_BATCH(batch, 0);
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0);
408
409     /* DW 26..28. CU level statics buffer */
410     OUT_BCS_BATCH(batch, 0);
411     OUT_BCS_BATCH(batch, 0);
412     OUT_BCS_BATCH(batch, 0);
413
414     ADVANCE_BCS_BATCH(batch);
415 }
416
417 static void
418 gen9_hcpd_qm_state(VADriverContextP ctx,
419                    int size_id,
420                    int color_component,
421                    int pred_type,
422                    int dc,
423                    unsigned char *qm,
424                    int qm_length,
425                    struct gen9_hcpd_context *gen9_hcpd_context)
426 {
427     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
428     unsigned char qm_buffer[64];
429
430     assert(qm_length <= 64);
431     memset(qm_buffer, 0, sizeof(qm_buffer));
432     memcpy(qm_buffer, qm, qm_length);
433
434     BEGIN_BCS_BATCH(batch, 18);
435
436     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
437     OUT_BCS_BATCH(batch,
438                   dc << 5 |
439                   color_component << 3 |
440                   size_id << 1 |
441                   pred_type);
442     intel_batchbuffer_data(batch, qm_buffer, 64);
443
444     ADVANCE_BCS_BATCH(batch);
445 }
446
447 static void
448 gen9_hcpd_hevc_qm_state(VADriverContextP ctx,
449                         struct decode_state *decode_state,
450                         struct gen9_hcpd_context *gen9_hcpd_context)
451 {
452     VAIQMatrixBufferHEVC *iq_matrix;
453     VAPictureParameterBufferHEVC *pic_param;
454     int i;
455
456     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
457         iq_matrix = (VAIQMatrixBufferHEVC *)decode_state->iq_matrix->buffer;
458     else
459         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
460
461     assert(decode_state->pic_param && decode_state->pic_param->buffer);
462     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
463
464     if (!pic_param->pic_fields.bits.scaling_list_enabled_flag)
465         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
466
467     for (i = 0; i < 6; i++) {
468         gen9_hcpd_qm_state(ctx,
469                            0, i % 3, i / 3, 0,
470                            iq_matrix->ScalingList4x4[i], 16,
471                            gen9_hcpd_context);
472     }
473
474     for (i = 0; i < 6; i++) {
475         gen9_hcpd_qm_state(ctx,
476                            1, i % 3, i / 3, 0,
477                            iq_matrix->ScalingList8x8[i], 64,
478                            gen9_hcpd_context);
479     }
480
481     for (i = 0; i < 6; i++) {
482         gen9_hcpd_qm_state(ctx,
483                            2, i % 3, i / 3, iq_matrix->ScalingListDC16x16[i],
484                            iq_matrix->ScalingList16x16[i], 64,
485                            gen9_hcpd_context);
486     }
487
488     for (i = 0; i < 2; i++) {
489         gen9_hcpd_qm_state(ctx,
490                            3, 0, i % 2, iq_matrix->ScalingListDC32x32[i],
491                            iq_matrix->ScalingList32x32[i], 64,
492                            gen9_hcpd_context);
493     }
494 }
495
496 static void
497 gen9_hcpd_pic_state(VADriverContextP ctx,
498                     struct decode_state *decode_state,
499                     struct gen9_hcpd_context *gen9_hcpd_context)
500 {
501     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
502     VAPictureParameterBufferHEVC *pic_param;
503     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
504     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
505     /*
506      * 7.4.3.1
507      *
508      * When not present, the value of loop_filter_across_tiles_enabled_flag
509      * is inferred to be equal to 1.
510      */
511     int loop_filter_across_tiles_enabled_flag = 1;
512
513     assert(decode_state->pic_param && decode_state->pic_param->buffer);
514     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
515
516     if (pic_param->pic_fields.bits.pcm_enabled_flag) {
517         max_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3 +
518                               pic_param->log2_diff_max_min_pcm_luma_coding_block_size;
519         min_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3;
520         pcm_sample_bit_depth_luma_minus1 = (pic_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
521         pcm_sample_bit_depth_chroma_minus1 = (pic_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
522     } else {
523         max_pcm_size_minus3 = MIN(pic_param->log2_min_luma_coding_block_size_minus3 + pic_param->log2_diff_max_min_luma_coding_block_size, 2);
524     }
525
526     if (pic_param->pic_fields.bits.tiles_enabled_flag)
527         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
528
529     BEGIN_BCS_BATCH(batch, 19);
530
531     OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
532
533     OUT_BCS_BATCH(batch,
534                   gen9_hcpd_context->picture_height_in_min_cb_minus1 << 16 |
535                   gen9_hcpd_context->picture_width_in_min_cb_minus1);
536     OUT_BCS_BATCH(batch,
537                   max_pcm_size_minus3 << 10 |
538                   min_pcm_size_minus3 << 8 |
539                   (pic_param->log2_min_transform_block_size_minus2 +
540                    pic_param->log2_diff_max_min_transform_block_size) << 6 |
541                   pic_param->log2_min_transform_block_size_minus2 << 4 |
542                   (pic_param->log2_min_luma_coding_block_size_minus3 +
543                    pic_param->log2_diff_max_min_luma_coding_block_size) << 2 |
544                   pic_param->log2_min_luma_coding_block_size_minus3);
545     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
546     OUT_BCS_BATCH(batch,
547                   0 << 27 |
548                   pic_param->pic_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
549                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
550                   pic_param->pic_fields.bits.amp_enabled_flag << 23 |
551                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
552                   !(pic_param->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 21 |
553                   !!(pic_param->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) << 20 |
554                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
555                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
556                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |
557                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
558                   loop_filter_across_tiles_enabled_flag << 15 |
559                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |
560                   pic_param->log2_parallel_merge_level_minus2 << 10 |
561                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |
562                   pic_param->pic_fields.bits.pcm_loop_filter_disabled_flag << 8 |
563                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |
564                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |
565                   pic_param->pic_fields.bits.pcm_enabled_flag << 4 |
566                   pic_param->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag << 3 |
567                   0);
568     OUT_BCS_BATCH(batch,
569                   pic_param->bit_depth_luma_minus8 << 27 |
570                   pic_param->bit_depth_chroma_minus8 << 24 |
571                   pcm_sample_bit_depth_luma_minus1 << 20 |
572                   pcm_sample_bit_depth_chroma_minus1 << 16 |
573                   pic_param->max_transform_hierarchy_depth_inter << 13 |
574                   pic_param->max_transform_hierarchy_depth_intra << 10 |
575                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
576                   (pic_param->pps_cb_qp_offset & 0x1f));
577     OUT_BCS_BATCH(batch,
578                   0 << 29 |
579                   0);
580     OUT_BCS_BATCH(batch, 0);
581     OUT_BCS_BATCH(batch, 0);
582     OUT_BCS_BATCH(batch, 0);
583     OUT_BCS_BATCH(batch, 0); /* DW 10 */
584     OUT_BCS_BATCH(batch, 0);
585     OUT_BCS_BATCH(batch, 0);
586     OUT_BCS_BATCH(batch, 0);
587     OUT_BCS_BATCH(batch, 0);
588     OUT_BCS_BATCH(batch, 0); /* DW 15 */
589     OUT_BCS_BATCH(batch, 0);
590     OUT_BCS_BATCH(batch, 0);
591     OUT_BCS_BATCH(batch, 0);
592
593     ADVANCE_BCS_BATCH(batch);
594 }
595
596 static void
597 gen9_hcpd_tile_state(VADriverContextP ctx,
598                      struct decode_state *decode_state,
599                      struct gen9_hcpd_context *gen9_hcpd_context)
600 {
601     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
602     VAPictureParameterBufferHEVC *pic_param;
603     uint8_t pos_col[20], pos_row[24];
604     int i;
605
606     assert(decode_state->pic_param && decode_state->pic_param->buffer);
607     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
608
609     memset(pos_col, 0, sizeof(pos_col));
610     memset(pos_row, 0, sizeof(pos_row));
611
612     for (i = 0; i <= MIN(pic_param->num_tile_columns_minus1, 18); i++)
613         pos_col[i + 1] = pos_col[i] + pic_param->column_width_minus1[i] + 1;
614
615     for (i = 0; i <= MIN(pic_param->num_tile_rows_minus1, 20); i++)
616         pos_row[i + 1] = pos_row[i] + pic_param->row_height_minus1[i] + 1;
617
618     BEGIN_BCS_BATCH(batch, 13);
619
620     OUT_BCS_BATCH(batch, HCP_TILE_STATE | (13 - 2));
621
622     OUT_BCS_BATCH(batch,
623                   pic_param->num_tile_columns_minus1 << 5 |
624                   pic_param->num_tile_rows_minus1);
625     intel_batchbuffer_data(batch, pos_col, 20);
626     intel_batchbuffer_data(batch, pos_row, 24);
627
628     ADVANCE_BCS_BATCH(batch);
629 }
630
631 static int
632 gen9_hcpd_get_reference_picture_frame_id(VAPictureHEVC *ref_pic,
633                                          GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
634 {
635     int i;
636
637     if (ref_pic->picture_id == VA_INVALID_ID ||
638         (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
639         return 0;
640
641     for (i = 0; i < MAX_GEN_HCP_REFERENCE_FRAMES; i++) {
642         if (ref_pic->picture_id == frame_store[i].surface_id) {
643             assert(frame_store[i].frame_store_id < MAX_GEN_HCP_REFERENCE_FRAMES);
644             return frame_store[i].frame_store_id;
645         }
646     }
647
648     /* Should never get here !!! */
649     assert(0);
650     return 0;
651 }
652
653 static void
654 gen9_hcpd_ref_idx_state_1(struct intel_batchbuffer *batch,
655                           int list,
656                           VAPictureParameterBufferHEVC *pic_param,
657                           VASliceParameterBufferHEVC *slice_param,
658                           GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
659 {
660     int i;
661     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
662     uint8_t *ref_list = slice_param->RefPicList[list];
663
664     BEGIN_BCS_BATCH(batch, 18);
665
666     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
667     OUT_BCS_BATCH(batch,
668                   num_ref_minus1 << 1 |
669                   list);
670
671     for (i = 0; i < 16; i++) {
672         if (i < MIN((num_ref_minus1 + 1), 15)) {
673             VAPictureHEVC *ref_pic = &pic_param->ReferenceFrames[ref_list[i]];
674             VAPictureHEVC *curr_pic = &pic_param->CurrPic;
675
676             OUT_BCS_BATCH(batch,
677                           !(ref_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 15 |
678                           !!(ref_pic->flags & VA_PICTURE_HEVC_FIELD_PIC) << 14 |
679                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |
680                           0 << 12 |
681                           0 << 11 |
682                           gen9_hcpd_get_reference_picture_frame_id(ref_pic, frame_store) << 8 |
683                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
684         } else {
685             OUT_BCS_BATCH(batch, 0);
686         }
687     }
688
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static void
693 gen9_hcpd_ref_idx_state(VADriverContextP ctx,
694                         VAPictureParameterBufferHEVC *pic_param,
695                         VASliceParameterBufferHEVC *slice_param,
696                         struct gen9_hcpd_context *gen9_hcpd_context)
697 {
698     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
699
700     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
701         return;
702
703     gen9_hcpd_ref_idx_state_1(batch, 0, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
704
705     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
706         return;
707
708     gen9_hcpd_ref_idx_state_1(batch, 1, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
709 }
710
711 static void
712 gen9_hcpd_weightoffset_state_1(struct intel_batchbuffer *batch,
713                                int list,
714                                VASliceParameterBufferHEVC *slice_param)
715 {
716     int i;
717     uint8_t num_ref_minus1 = (list == 1) ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1;
718     int8_t *luma_offset = (list == 1) ? slice_param->luma_offset_l1 : slice_param->luma_offset_l0;
719     int8_t *delta_luma_weight = (list == 1) ? slice_param->delta_luma_weight_l1 : slice_param->delta_luma_weight_l0;
720     int8_t (* chroma_offset)[2] = (list == 1) ? slice_param->ChromaOffsetL1 : slice_param->ChromaOffsetL0;
721     int8_t (* delta_chroma_weight)[2] = (list == 1) ? slice_param->delta_chroma_weight_l1 : slice_param->delta_chroma_weight_l0;
722
723     BEGIN_BCS_BATCH(batch, 34);
724
725     OUT_BCS_BATCH(batch, HCP_WEIGHTOFFSET | (34 - 2));
726     OUT_BCS_BATCH(batch, list);
727
728     for (i = 0; i < 16; i++) {
729         if (i < MIN((num_ref_minus1 + 1), 15)) {
730             OUT_BCS_BATCH(batch,
731                           (luma_offset[i] & 0xff) << 8 |
732                           (delta_luma_weight[i] & 0xff));
733         } else {
734             OUT_BCS_BATCH(batch, 0);
735         }
736     }
737     for (i = 0; i < 16; i++) {
738         if (i < MIN((num_ref_minus1 + 1), 15)) {
739             OUT_BCS_BATCH(batch,
740                           (chroma_offset[i][1] & 0xff) << 24 |
741                           (delta_chroma_weight[i][1] & 0xff) << 16 |
742                           (chroma_offset[i][0] & 0xff) << 8 |
743                           (delta_chroma_weight[i][0] & 0xff));
744         } else {
745             OUT_BCS_BATCH(batch, 0);
746         }
747     }
748
749     ADVANCE_BCS_BATCH(batch);
750 }
751
752 static void
753 gen9_hcpd_weightoffset_state(VADriverContextP ctx,
754                              VAPictureParameterBufferHEVC *pic_param,
755                              VASliceParameterBufferHEVC *slice_param,
756                              struct gen9_hcpd_context *gen9_hcpd_context)
757 {
758     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
759
760     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
761         return;
762
763     if ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P &&
764          !pic_param->pic_fields.bits.weighted_pred_flag) ||
765         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
766          !pic_param->pic_fields.bits.weighted_bipred_flag))
767         return;
768
769     gen9_hcpd_weightoffset_state_1(batch, 0, slice_param);
770
771     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
772         return;
773
774     gen9_hcpd_weightoffset_state_1(batch, 1, slice_param);
775 }
776
777 static int
778 gen9_hcpd_get_collocated_ref_idx(VADriverContextP ctx,
779                                  VAPictureParameterBufferHEVC *pic_param,
780                                  VASliceParameterBufferHEVC *slice_param,
781                                  struct gen9_hcpd_context *gen9_hcpd_context)
782 {
783     uint8_t *ref_list;
784     VAPictureHEVC *ref_pic;
785
786     if (slice_param->collocated_ref_idx > 14)
787         return 0;
788
789     if (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
790         return 0;
791
792     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
793         return 0;
794
795     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P ||
796         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
797          slice_param->LongSliceFlags.fields.collocated_from_l0_flag))
798         ref_list = slice_param->RefPicList[0];
799     else {
800         assert(slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B);
801         ref_list = slice_param->RefPicList[1];
802     }
803
804     ref_pic = &pic_param->ReferenceFrames[ref_list[slice_param->collocated_ref_idx]];
805
806     return gen9_hcpd_get_reference_picture_frame_id(ref_pic, gen9_hcpd_context->reference_surfaces);
807 }
808
809 static int
810 gen9_hcpd_is_list_low_delay(uint8_t ref_list_count,
811                             uint8_t ref_list[15],
812                             VAPictureHEVC *curr_pic,
813                             VAPictureHEVC ref_surfaces[15])
814 {
815     int i;
816
817     for (i = 0; i < MIN(ref_list_count, 15); i++) {
818         VAPictureHEVC *ref_pic;
819
820         if (ref_list[i] > 14)
821             continue;
822
823         ref_pic = &ref_surfaces[ref_list[i]];
824
825         if (ref_pic->pic_order_cnt > curr_pic->pic_order_cnt)
826             return 0;
827     }
828
829     return 1;
830 }
831
832 static int
833 gen9_hcpd_is_low_delay(VADriverContextP ctx,
834                        VAPictureParameterBufferHEVC *pic_param,
835                        VASliceParameterBufferHEVC *slice_param)
836 {
837     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
838         return 0;
839     else if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
840         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
841                                            slice_param->RefPicList[0],
842                                            &pic_param->CurrPic,
843                                            pic_param->ReferenceFrames);
844     else
845         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
846                                            slice_param->RefPicList[0],
847                                            &pic_param->CurrPic,
848                                            pic_param->ReferenceFrames) &&
849                gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l1_active_minus1 + 1,
850                                            slice_param->RefPicList[1],
851                                            &pic_param->CurrPic,
852                                            pic_param->ReferenceFrames);
853 }
854
855 static void
856 gen9_hcpd_slice_state(VADriverContextP ctx,
857                       VAPictureParameterBufferHEVC *pic_param,
858                       VASliceParameterBufferHEVC *slice_param,
859                       VASliceParameterBufferHEVC *next_slice_param,
860                       struct gen9_hcpd_context *gen9_hcpd_context)
861 {
862     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
863     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
864     unsigned short collocated_ref_idx, collocated_from_l0_flag;
865     int sliceqp_sign_flag = 0, sliceqp = 0;
866
867     slice_hor_pos = slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
868     slice_ver_pos = slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
869
870     if (next_slice_param) {
871         next_slice_hor_pos = next_slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
872         next_slice_ver_pos = next_slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
873     } else {
874         next_slice_hor_pos = 0;
875         next_slice_ver_pos = 0;
876     }
877
878     collocated_ref_idx = gen9_hcpd_get_collocated_ref_idx(ctx, pic_param, slice_param, gen9_hcpd_context);
879     collocated_from_l0_flag = slice_param->LongSliceFlags.fields.collocated_from_l0_flag;
880
881     if ((!gen9_hcpd_context->first_inter_slice_valid) &&
882         (slice_param->LongSliceFlags.fields.slice_type != HEVC_SLICE_I) &&
883         slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag) {
884         gen9_hcpd_context->first_inter_slice_collocated_ref_idx = collocated_ref_idx;
885         gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = collocated_from_l0_flag;
886         gen9_hcpd_context->first_inter_slice_valid = 1;
887     }
888
889     /* HW requirement */
890     if (gen9_hcpd_context->first_inter_slice_valid &&
891         ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I) ||
892          (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag))) {
893         collocated_ref_idx = gen9_hcpd_context->first_inter_slice_collocated_ref_idx;
894         collocated_from_l0_flag = gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag;
895     }
896
897     sliceqp = pic_param->init_qp_minus26 + 26 + slice_param->slice_qp_delta;
898     if ((pic_param->bit_depth_luma_minus8 > 0)
899         || (pic_param->bit_depth_chroma_minus8 > 0)) {
900         if (sliceqp < 0) {
901             sliceqp_sign_flag = 1;
902             sliceqp = -sliceqp;
903         }
904     }
905
906     BEGIN_BCS_BATCH(batch, 9);
907
908     OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
909
910     OUT_BCS_BATCH(batch,
911                   slice_ver_pos << 16 |
912                   slice_hor_pos);
913     OUT_BCS_BATCH(batch,
914                   next_slice_ver_pos << 16 |
915                   next_slice_hor_pos);
916     OUT_BCS_BATCH(batch,
917                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
918                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
919                   sliceqp << 6 |
920                   slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag << 5 |
921                   slice_param->LongSliceFlags.fields.dependent_slice_segment_flag << 4 |
922                   sliceqp_sign_flag << 3 |
923                   !next_slice_param << 2 |
924                   slice_param->LongSliceFlags.fields.slice_type);
925     OUT_BCS_BATCH(batch,
926                   collocated_ref_idx << 26 |
927                   (5 - slice_param->five_minus_max_num_merge_cand - 1) << 23 |
928                   slice_param->LongSliceFlags.fields.cabac_init_flag << 22 |
929                   slice_param->luma_log2_weight_denom << 19 |
930                   ((slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) & 0x7) << 16 |
931                   collocated_from_l0_flag << 15 |
932                   gen9_hcpd_is_low_delay(ctx, pic_param, slice_param) << 14 |
933                   slice_param->LongSliceFlags.fields.mvd_l1_zero_flag << 13 |
934                   slice_param->LongSliceFlags.fields.slice_sao_luma_flag << 12 |
935                   slice_param->LongSliceFlags.fields.slice_sao_chroma_flag << 11 |
936                   slice_param->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag << 10 |
937                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
938                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
939                   slice_param->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag);
940     OUT_BCS_BATCH(batch,
941                   slice_param->slice_data_byte_offset); /* DW 5 */
942     OUT_BCS_BATCH(batch,
943                   0 << 26 |
944                   0 << 20 |
945                   0);
946     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
947     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
948
949     ADVANCE_BCS_BATCH(batch);
950 }
951
952 static void
953 gen9_hcpd_bsd_object(VADriverContextP ctx,
954                      VASliceParameterBufferHEVC *slice_param,
955                      struct gen9_hcpd_context *gen9_hcpd_context)
956 {
957     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
958
959     BEGIN_BCS_BATCH(batch, 3);
960
961     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
962
963     OUT_BCS_BATCH(batch, slice_param->slice_data_size);
964     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
965
966     ADVANCE_BCS_BATCH(batch);
967 }
968
969 static VAStatus
970 gen9_hcpd_hevc_decode_picture(VADriverContextP ctx,
971                               struct decode_state *decode_state,
972                               struct gen9_hcpd_context *gen9_hcpd_context)
973 {
974     VAStatus vaStatus;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
977     VAPictureParameterBufferHEVC *pic_param;
978     VASliceParameterBufferHEVC *slice_param, *next_slice_param, *next_slice_group_param;
979     dri_bo *slice_data_bo;
980     int i, j;
981
982     vaStatus = gen9_hcpd_hevc_decode_init(ctx, decode_state, gen9_hcpd_context);
983
984     if (vaStatus != VA_STATUS_SUCCESS)
985         goto out;
986
987     assert(decode_state->pic_param && decode_state->pic_param->buffer);
988     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
989
990     if (i965->intel.has_bsd2)
991         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
992     else
993         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
994     intel_batchbuffer_emit_mi_flush(batch);
995
996     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_HEVC, gen9_hcpd_context);
997     gen9_hcpd_surface_state(ctx, decode_state, gen9_hcpd_context);
998     gen9_hcpd_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
999     gen9_hcpd_hevc_qm_state(ctx, decode_state, gen9_hcpd_context);
1000     gen9_hcpd_pic_state(ctx, decode_state, gen9_hcpd_context);
1001
1002     if (pic_param->pic_fields.bits.tiles_enabled_flag)
1003         gen9_hcpd_tile_state(ctx, decode_state, gen9_hcpd_context);
1004
1005     /* Need to double it works or not if the two slice groups have differenct slice data buffers */
1006     for (j = 0; j < decode_state->num_slice_params; j++) {
1007         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1008         slice_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j]->buffer;
1009         slice_data_bo = decode_state->slice_datas[j]->bo;
1010
1011         if (IS_GEN10(i965->intel.device_info))
1012             gen10_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1013         else
1014             gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1015
1016         if (j == decode_state->num_slice_params - 1)
1017             next_slice_group_param = NULL;
1018         else
1019             next_slice_group_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j + 1]->buffer;
1020
1021         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1022             if (i < decode_state->slice_params[j]->num_elements - 1)
1023                 next_slice_param = slice_param + 1;
1024             else
1025                 next_slice_param = next_slice_group_param;
1026
1027             gen9_hcpd_slice_state(ctx, pic_param, slice_param, next_slice_param, gen9_hcpd_context);
1028             gen9_hcpd_ref_idx_state(ctx, pic_param, slice_param, gen9_hcpd_context);
1029             gen9_hcpd_weightoffset_state(ctx, pic_param, slice_param, gen9_hcpd_context);
1030             gen9_hcpd_bsd_object(ctx, slice_param, gen9_hcpd_context);
1031             slice_param++;
1032         }
1033     }
1034
1035     intel_batchbuffer_end_atomic(batch);
1036     intel_batchbuffer_flush(batch);
1037
1038 out:
1039     return vaStatus;
1040 }
1041
1042 /*********************************************************/
1043 /*                  VP9 Code                             */
1044 /*********************************************************/
1045
1046
1047 #define VP9_PROB_BUFFER_FIRST_PART_SIZE 2010
1048 #define VP9_PROB_BUFFER_SECOND_PART_SIZE 10
1049 #define VP9_PROB_BUFFER_KEY_INTER_OFFSET 1667
1050 #define VP9_PROB_BUFFER_KEY_INTER_SIZE   343
1051
1052 #define VP9_PROB_BUFFER_UPDATE_NO   0
1053 #define VP9_PROB_BUFFER_UPDATE_SECNE_1    1
1054 #define VP9_PROB_BUFFER_UPDATE_SECNE_2    2
1055 #define VP9_PROB_BUFFER_UPDATE_SECNE_3    3
1056 #define VP9_PROB_BUFFER_UPDATE_SECNE_4    4
1057 #define VP9_PROB_BUFFER_UPDATE_SECNE_5    5
1058
1059 #define VP9_PROB_BUFFER_SAVED_NO   0
1060 #define VP9_PROB_BUFFER_SAVED_SECNE_1    1
1061 #define VP9_PROB_BUFFER_SAVED_SECNE_2    2
1062
1063 #define VP9_PROB_BUFFER_RESTORED_NO   0
1064 #define VP9_PROB_BUFFER_RESTORED_SECNE_1    1
1065 #define VP9_PROB_BUFFER_RESTORED_SECNE_2    2
1066 #define VP9_PROB_BUFFER_RESTORED_SECNE_MAX    (VP9_PROB_BUFFER_RESTORED_SECNE_2 + 1)
1067
1068 #define ALLOC_MV_BUFFER(gen_buffer, string, size,width,height)  \
1069     do {                                                        \
1070         dri_bo_unreference(gen_buffer->bo);                     \
1071         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
1072                                       string,                   \
1073                                       size,                     \
1074                                       0x1000);                  \
1075         assert(gen_buffer->bo);                                 \
1076         gen_buffer->frame_width  = width ;                      \
1077         gen_buffer->frame_height = height;                      \
1078     } while (0)
1079
1080 static void
1081 vp9_update_segmentId_buffer(VADriverContextP ctx,
1082                             struct decode_state *decode_state,
1083                             struct gen9_hcpd_context *gen9_hcpd_context)
1084 {
1085     struct i965_driver_data *i965 = i965_driver_data(ctx);
1086     VADecPictureParameterBufferVP9 *pic_param;
1087
1088     int size = 0;
1089     int is_scaling = 0;
1090     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1091     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1092
1093     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 1 ;
1094     size <<= 6;
1095     if (gen9_hcpd_context->vp9_segment_id_buffer.bo == NULL || pic_param->frame_width > gen9_hcpd_context->last_frame.frame_width || pic_param->frame_height > gen9_hcpd_context->last_frame.frame_height) {
1096         ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer), "vp9 segment id buffer", size);
1097     }
1098
1099     is_scaling = (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) || (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height);
1100
1101     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1102         pic_param->pic_fields.bits.error_resilient_mode ||
1103         pic_param->pic_fields.bits.intra_only || is_scaling) {
1104
1105         //VP9 Segment ID buffer needs to be zero
1106         dri_bo_map(gen9_hcpd_context->vp9_segment_id_buffer.bo, 1);
1107         memset((unsigned char *)gen9_hcpd_context->vp9_segment_id_buffer.bo->virtual, 0, size);
1108         dri_bo_unmap(gen9_hcpd_context->vp9_segment_id_buffer.bo);
1109     }
1110 }
1111
1112 static void
1113 vp9_update_mv_temporal_buffer(VADriverContextP ctx,
1114                               struct decode_state *decode_state,
1115                               struct gen9_hcpd_context *gen9_hcpd_context)
1116 {
1117     struct i965_driver_data *i965 = i965_driver_data(ctx);
1118     VADecPictureParameterBufferVP9 *pic_param;
1119     int size = 0;
1120
1121     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1122     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1123
1124     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 9 ;
1125     size <<= 6; //CL aligned
1126     if (gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo == NULL || pic_param->frame_width > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width || pic_param->frame_height > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height) {
1127         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_curr), "vp9 curr mv temporal buffer", size, pic_param->frame_width, pic_param->frame_height);
1128     }
1129     if (gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo == NULL) {
1130         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_last), "vp9 last mv temporal buffer", size, pic_param->frame_width, pic_param->frame_height);
1131     }
1132
1133 }
1134
1135 static void
1136 vp9_gen_default_probabilities(VADriverContextP ctx, struct gen9_hcpd_context *gen9_hcpd_context)
1137 {
1138     int i = 0;
1139     uint32_t size = 0;
1140
1141     size = sizeof(FRAME_CONTEXT);
1142     memset(&gen9_hcpd_context->vp9_fc_key_default, 0, size);
1143     memset(&gen9_hcpd_context->vp9_fc_inter_default, 0, size);
1144     memset(&gen9_hcpd_context->vp9_frame_ctx, 0, size * FRAME_CONTEXTS);
1145     //more code to come here below
1146
1147     //1. key default
1148     gen9_hcpd_context->vp9_fc_key_default.tx_probs = default_tx_probs;
1149     //dummy 52
1150     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs4x4, default_coef_probs_4x4);
1151     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs8x8, default_coef_probs_8x8);
1152     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs16x16, default_coef_probs_16x16);
1153     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs32x32, default_coef_probs_32x32);
1154     //dummy 16
1155     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.skip_probs, default_skip_probs);
1156     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.partition_prob, vp9_kf_partition_probs);
1157     //dummy 47
1158     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.uv_mode_prob, vp9_kf_uv_mode_prob);
1159     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_tree_probs, default_seg_tree_probs);
1160     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_pred_probs, default_seg_pred_probs);
1161
1162     //2. inter default
1163     gen9_hcpd_context->vp9_fc_inter_default.tx_probs = default_tx_probs;
1164     //dummy 52
1165     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs4x4, default_coef_probs_4x4);
1166     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs8x8, default_coef_probs_8x8);
1167     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs16x16, default_coef_probs_16x16);
1168     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs32x32, default_coef_probs_32x32);
1169     //dummy 16
1170     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.skip_probs, default_skip_probs);
1171     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.inter_mode_probs, default_inter_mode_probs);
1172     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.switchable_interp_prob, default_switchable_interp_prob);
1173     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.intra_inter_prob, default_intra_inter_p);
1174     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_inter_prob, default_comp_inter_p);
1175     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.single_ref_prob, default_single_ref_p);
1176     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_ref_prob, default_comp_ref_p);
1177     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.y_mode_prob, default_if_y_probs);
1178     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.partition_prob, default_partition_probs);
1179     gen9_hcpd_context->vp9_fc_inter_default.nmvc = default_nmv_context;
1180     //dummy 47
1181     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.uv_mode_prob, default_if_uv_probs);
1182     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_tree_probs, default_seg_tree_probs);
1183     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_pred_probs, default_seg_pred_probs);
1184
1185     for (i = 0; i < FRAME_CONTEXTS; i++) {
1186         gen9_hcpd_context->vp9_frame_ctx[i] = gen9_hcpd_context->vp9_fc_inter_default;
1187     }
1188 }
1189
1190 static void
1191 vp9_update_probabilities(VADriverContextP ctx,
1192                          struct decode_state *decode_state,
1193                          struct gen9_hcpd_context *gen9_hcpd_context)
1194 {
1195     VADecPictureParameterBufferVP9 *pic_param;
1196     int i = 0;
1197
1198     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1199     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1200
1201     //first part buffer update: Case 1)Reset all 4 probablity buffers
1202     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) || pic_param->pic_fields.bits.intra_only || pic_param->pic_fields.bits.error_resilient_mode) {
1203         if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1204             (pic_param->pic_fields.bits.reset_frame_context == 3) ||
1205             pic_param->pic_fields.bits.error_resilient_mode) {
1206             //perform full buffer update
1207             for (i = 0; i < FRAME_CONTEXTS; i++) {
1208                 memcpy(&gen9_hcpd_context->vp9_frame_ctx[i], &gen9_hcpd_context->vp9_fc_inter_default, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1209
1210                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_tree_probs, default_seg_tree_probs);
1211                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_pred_probs, default_seg_pred_probs);
1212             }
1213         } else if (pic_param->pic_fields.bits.reset_frame_context == 2 && pic_param->pic_fields.bits.intra_only) {
1214             memcpy(&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx], &gen9_hcpd_context->vp9_fc_inter_default, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1215         }
1216         pic_param->pic_fields.bits.frame_context_idx = 0;
1217     }
1218
1219     //Case 3) Update only segment probabilities
1220     if ((pic_param->pic_fields.bits.segmentation_enabled &&
1221          pic_param->pic_fields.bits.segmentation_update_map)) {
1222         //Update seg_tree_probs and seg_pred_probs accordingly
1223         for (i = 0; i < SEG_TREE_PROBS; i++) {
1224             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_tree_probs[i] = pic_param->mb_segment_tree_probs[i];
1225         }
1226         for (i = 0; i < PREDICTION_PROBS; i++) {
1227             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_pred_probs[i] = pic_param->segment_pred_probs[i];
1228         }
1229     }
1230
1231     //update vp9_fc according to frame_context_id
1232     {
1233         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1234         void *pprob = NULL;
1235
1236         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo, 1);
1237
1238         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1239         memcpy(pprob, pfc, 2048);
1240         //only update 343bytes for key or intra_only frame
1241         if (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME ||
1242             pic_param->pic_fields.bits.intra_only) {
1243             memcpy(pprob + VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE
1244                    , gen9_hcpd_context->vp9_fc_key_default.inter_mode_probs
1245                    , VP9_PROB_BUFFER_KEY_INTER_SIZE);
1246         }
1247
1248         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1249     }
1250 }
1251
1252 static void
1253 gen9_hcpd_init_vp9_surface(VADriverContextP ctx,
1254                            VADecPictureParameterBufferVP9 *pic_param,
1255                            struct object_surface *obj_surface,
1256                            struct gen9_hcpd_context *gen9_hcpd_context)
1257 {
1258     GenVP9Surface *gen9_vp9_surface;
1259
1260     if (!obj_surface)
1261         return;
1262
1263     obj_surface->free_private_data = gen_free_vp9_surface;
1264     gen9_vp9_surface = obj_surface->private_data;
1265
1266     if (!gen9_vp9_surface) {
1267         gen9_vp9_surface = calloc(sizeof(GenVP9Surface), 1);
1268         assert(gen9_vp9_surface);
1269         gen9_vp9_surface->base.frame_store_id = -1;
1270         obj_surface->private_data = gen9_vp9_surface;
1271     }
1272
1273     gen9_vp9_surface->frame_width  = pic_param->frame_width;
1274     gen9_vp9_surface->frame_height = pic_param->frame_height;
1275
1276 }
1277
1278 static VAStatus
1279 gen9_hcpd_vp9_decode_init(VADriverContextP ctx,
1280                           struct decode_state *decode_state,
1281                           struct gen9_hcpd_context *gen9_hcpd_context)
1282 {
1283     struct i965_driver_data *i965 = i965_driver_data(ctx);
1284     VADecPictureParameterBufferVP9 *pic_param;
1285     struct object_surface *obj_surface;
1286     uint32_t size;
1287     int width_in_mbs = 0, height_in_mbs = 0;
1288     int bit_depth_minus8 = 0;
1289
1290     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1291     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1292
1293     width_in_mbs = (pic_param->frame_width + 15) / 16;
1294     height_in_mbs = (pic_param->frame_height + 15) / 16;
1295
1296     //For BXT, we support only till 4K
1297     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1298     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1299
1300     if (!(i965->codec_info->vp9_dec_profiles & (1U << pic_param->profile)))
1301         return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1302
1303     if (pic_param->profile >= 2) {
1304         if (pic_param->bit_depth >= 8)
1305             bit_depth_minus8 = pic_param->bit_depth - 8;
1306
1307         if (bit_depth_minus8 == 2) {
1308             if (!(i965->codec_info->vp9_dec_chroma_formats & VA_RT_FORMAT_YUV420_10BPP))
1309                 return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1310         } else if ((bit_depth_minus8 > 2) || (bit_depth_minus8 == 1) || (bit_depth_minus8 < 0))
1311             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1312     }
1313
1314     //Update the frame store buffers with the reference frames information
1315     intel_update_vp9_frame_store_index(ctx,
1316                                        decode_state,
1317                                        pic_param,
1318                                        gen9_hcpd_context->reference_surfaces);
1319
1320     /* Current decoded picture */
1321     obj_surface = decode_state->render_object;
1322     //Ensure there is a tiled render surface in NV12 format. If not, create one.
1323     vp9_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1324
1325
1326     //Super block in VP9 is 64x64
1327     gen9_hcpd_context->ctb_size = 64;
1328     gen9_hcpd_context->min_cb_size = 8; //Min block size is 4 or 8?
1329
1330     //If picture width/height is not multiple of 64, needs to upsize it to the next 64 pixels
1331     //before calculation below.
1332     gen9_hcpd_context->picture_width_in_ctbs  = ALIGN(pic_param->frame_width, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1333     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(pic_param->frame_height, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1334
1335     gen9_hcpd_context->picture_width_in_min_cb_minus1  = ALIGN(pic_param->frame_width, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1336     gen9_hcpd_context->picture_height_in_min_cb_minus1 = ALIGN(pic_param->frame_height, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1337
1338     gen9_hcpd_context->picture_width_in_pixels  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1339     gen9_hcpd_context->picture_height_in_pixels = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1340
1341     gen9_hcpd_init_vp9_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
1342
1343     if (pic_param->profile >= 2)
1344         size = gen9_hcpd_context->picture_width_in_ctbs * 36; //num_width_in_SB * 36
1345     else
1346         size = gen9_hcpd_context->picture_width_in_ctbs * 18; //num_width_in_SB * 18
1347     size <<= 6;
1348     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
1349     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1350
1351     if (pic_param->profile >= 2)
1352         size = gen9_hcpd_context->picture_height_in_ctbs * 34; //num_height_in_SB * 17
1353     else
1354         size = gen9_hcpd_context->picture_height_in_ctbs * 17; //num_height_in_SB * 17
1355     size <<= 6;
1356     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1357
1358     size = gen9_hcpd_context->picture_width_in_ctbs * 5; //num_width_in_SB * 5
1359     size <<= 6;
1360     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
1361     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1362
1363     size = gen9_hcpd_context->picture_height_in_ctbs * 5; //num_height_in_SB * 5
1364     size <<= 6;
1365     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1366
1367     size = gen9_hcpd_context->picture_width_in_ctbs * 1; //num_width_in_SB * 1
1368     size <<= 6;
1369     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer), "hvd line rowstore buffer", size);
1370     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer), "hvd tile rowstore buffer", size);
1371
1372     size = 32;
1373     size <<= 6;
1374     ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer), "vp9 probability buffer", size);
1375
1376     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
1377     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
1378     gen9_hcpd_context->first_inter_slice_valid = 0;
1379
1380     return VA_STATUS_SUCCESS;
1381 }
1382
1383 static void
1384 gen9_hcpd_vp9_surface_state(VADriverContextP ctx,
1385                             struct decode_state *decode_state,
1386                             struct gen9_hcpd_context *gen9_hcpd_context)
1387 {
1388     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1389     struct object_surface *obj_surface = decode_state->render_object;
1390     struct object_surface *tmp_obj_surface = NULL;
1391     unsigned int y_cb_offset;
1392     int i = 0;
1393
1394     assert(obj_surface);
1395
1396     y_cb_offset = obj_surface->y_cb_offset;
1397
1398     BEGIN_BCS_BATCH(batch, 3);
1399
1400     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1401     OUT_BCS_BATCH(batch,
1402                   (0 << 28) |                   /* surface id */
1403                   (obj_surface->width - 1));    /* pitch - 1 */
1404     OUT_BCS_BATCH(batch,
1405                   (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1406                   y_cb_offset);
1407     ADVANCE_BCS_BATCH(batch);
1408
1409     tmp_obj_surface = obj_surface;
1410
1411     for (i = 0; i < 3; i++) {
1412         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1413         if (obj_surface && obj_surface->private_data) {
1414             BEGIN_BCS_BATCH(batch, 3);
1415
1416             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1417             OUT_BCS_BATCH(batch,
1418                           ((i + 2) << 28) |                   /* surface id */
1419                           (obj_surface->width - 1));    /* pitch - 1 */
1420             OUT_BCS_BATCH(batch,
1421                           (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1422                           obj_surface->y_cb_offset);
1423             ADVANCE_BCS_BATCH(batch);
1424         } else {
1425             BEGIN_BCS_BATCH(batch, 3);
1426
1427             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1428             OUT_BCS_BATCH(batch,
1429                           ((i + 2) << 28) |                   /* surface id */
1430                           (tmp_obj_surface->width - 1));    /* pitch - 1 */
1431             OUT_BCS_BATCH(batch,
1432                           (((tmp_obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010 : SURFACE_FORMAT_PLANAR_420_8) << 28) |
1433                           tmp_obj_surface->y_cb_offset);
1434             ADVANCE_BCS_BATCH(batch);
1435         }
1436     }
1437 }
1438
1439 static void
1440 gen9_hcpd_vp9_pipe_buf_addr_state(VADriverContextP ctx,
1441                                   struct decode_state *decode_state,
1442                                   struct gen9_hcpd_context *gen9_hcpd_context)
1443 {
1444     struct i965_driver_data *i965 = i965_driver_data(ctx);
1445     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1446     struct object_surface *obj_surface;
1447     int i = 0;
1448
1449     BEGIN_BCS_BATCH(batch, 95);
1450
1451     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
1452
1453     obj_surface = decode_state->render_object;
1454     assert(obj_surface && obj_surface->bo);
1455
1456     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
1457     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
1458     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
1459     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
1460     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
1461     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
1462     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
1463     OUT_BUFFER_MA_TARGET(NULL);    /* DW 22..24, ignore for VP9 */
1464     OUT_BUFFER_MA_TARGET(NULL);    /* DW 25..27, ignore for VP9 */
1465     OUT_BUFFER_MA_TARGET(NULL);    /* DW 28..30, ignore for VP9 */
1466     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo); /* DW 31..33 */
1467
1468     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
1469
1470     /* DW 37..52 - Reference picture address */
1471     for (i = 0; i < 3; i++) {
1472         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1473
1474         if (obj_surface) {
1475             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
1476         } else
1477             OUT_BUFFER_NMA_REFERENCE(NULL);
1478     }
1479     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1480         OUT_BUFFER_NMA_REFERENCE(NULL);
1481     }
1482     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
1483
1484     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
1485     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, StreamOutEnable - used for transcoding */
1486     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, DecodedPictureStatusError, ignored */
1487     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, Ignored */
1488
1489     /* DW 66..81 - for 8 Collocated motion vectors */
1490     for (i = 0; i < 1; i++) {
1491         OUT_BUFFER_NMA_REFERENCE(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1492     }
1493     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1494         OUT_BUFFER_NMA_REFERENCE(NULL);
1495     }
1496
1497     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
1498
1499     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_probability_buffer.bo); /* DW 83..85, VP9 Probability bufffer */
1500     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_segment_id_buffer.bo);  /* DW 86..88, VP9 Segment ID buffer */
1501     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_line_rowstore_buffer.bo);/* DW 89..91, VP9 HVD Line Rowstore buffer */
1502     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_tile_rowstore_buffer.bo);/* DW 92..94, VP9 HVD Tile Rowstore buffer */
1503
1504     ADVANCE_BCS_BATCH(batch);
1505 }
1506
1507 static inline int
1508 gen9_hcpd_vp9_valid_ref_frame_size(int ref_width, int ref_height,
1509                                    int cur_width, int cur_height)
1510 {
1511     return 2 * cur_width >= ref_width &&
1512            2 * cur_height >= ref_height &&
1513            cur_width <= 16 * ref_width &&
1514            cur_height <= 16 * ref_height;
1515 }
1516 static void
1517 gen9_hcpd_vp9_pic_state(VADriverContextP ctx,
1518                         struct decode_state *decode_state,
1519                         struct gen9_hcpd_context *gen9_hcpd_context)
1520 {
1521     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1522     VADecPictureParameterBufferVP9 *pic_param;
1523     struct object_surface *obj_surface;
1524     GenVP9Surface *gen9_vp9_surface;
1525     uint16_t scale_h = 0;
1526     uint16_t scale_w = 0;
1527     uint16_t frame_width_in_pixel = 0;
1528     uint16_t frame_height_in_pixel = 0;
1529     uint16_t fwidth = 64;
1530     uint16_t fheight = 64;
1531     int i;
1532     int bit_depth_minus8 = 0;
1533
1534 #define LEN_COMMAND_OWN 12
1535     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1536     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1537
1538     uint8_t segmentIDStreamInEnable = 0;
1539     uint8_t segmentIDStreamOutEnable = (pic_param->pic_fields.bits.segmentation_enabled && pic_param->pic_fields.bits.segmentation_update_map);
1540
1541     // For KEY_FRAME or INTRA_ONLY frame, this bit should be set to "0".
1542     uint8_t segmentation_temporal_update =
1543         ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) || (pic_param->pic_fields.bits.intra_only)) ? 0 : pic_param->pic_fields.bits.segmentation_temporal_update;
1544
1545
1546     if (pic_param->pic_fields.bits.intra_only || (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)) {
1547         segmentIDStreamInEnable = 1;
1548     } else if (pic_param->pic_fields.bits.segmentation_enabled) {
1549         if (!pic_param->pic_fields.bits.segmentation_update_map) {
1550             segmentIDStreamInEnable = 1;
1551
1552         } else if (pic_param->pic_fields.bits.segmentation_temporal_update) {
1553             segmentIDStreamInEnable = 1;
1554         }
1555     }
1556
1557     if (pic_param->pic_fields.bits.error_resilient_mode) {
1558         segmentIDStreamInEnable = 1;
1559     }
1560
1561     //frame type of previous frame (Key or Non-Key Frame).
1562     uint8_t last_frame_type = gen9_hcpd_context->last_frame.frame_type;
1563
1564     uint8_t use_pre_frame_mvs = 0;
1565     use_pre_frame_mvs = !((pic_param->pic_fields.bits.error_resilient_mode) ||
1566                           (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) ||
1567                           (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height) ||
1568                           (pic_param->pic_fields.bits.intra_only) ||
1569                           (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1570                           (gen9_hcpd_context->last_frame.intra_only) ||
1571                           (last_frame_type == HCP_VP9_KEY_FRAME) ||
1572                           (!gen9_hcpd_context->last_frame.show_frame));
1573
1574     uint8_t adapt_probabilities_flag = 0;
1575     if ((pic_param->pic_fields.bits.error_resilient_mode || pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1576         adapt_probabilities_flag = 0; //0: Do not adapt (error resilient or frame_parallel_mode are set)
1577     else if (!(pic_param->pic_fields.bits.error_resilient_mode) && !(pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1578         adapt_probabilities_flag = 1; //1: Adapt (not error resilient and not frame_ parallel_mode)
1579
1580     frame_width_in_pixel  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1581     frame_height_in_pixel = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1582
1583     fwidth = (fwidth > frame_width_in_pixel) ? frame_width_in_pixel : fwidth;
1584     fheight = (fheight > frame_height_in_pixel) ? frame_height_in_pixel : fheight;
1585
1586     if (pic_param->profile >= 2) {
1587         if (pic_param->bit_depth >= 8)
1588             bit_depth_minus8 = pic_param->bit_depth - 8;
1589     }
1590
1591     BEGIN_BCS_BATCH(batch, LEN_COMMAND_OWN);
1592
1593     OUT_BCS_BATCH(batch, HCP_VP9_PIC_STATE | (LEN_COMMAND_OWN - 2));
1594
1595     OUT_BCS_BATCH(batch,
1596                   (frame_height_in_pixel - 1) << 16 |
1597                   (frame_width_in_pixel - 1));         /* DW 1 */
1598     OUT_BCS_BATCH(batch,
1599                   segmentIDStreamInEnable << 31 |
1600                   segmentIDStreamOutEnable << 30 |
1601                   pic_param->pic_fields.bits.lossless_flag << 29 |
1602                   segmentation_temporal_update << 28 |
1603                   (pic_param->pic_fields.bits.segmentation_enabled &&
1604                    pic_param->pic_fields.bits.segmentation_update_map) << 27 |
1605                   pic_param->pic_fields.bits.segmentation_enabled << 26   |
1606                   pic_param->sharpness_level << 23 |
1607                   pic_param->filter_level << 17 |
1608                   pic_param->pic_fields.bits.frame_parallel_decoding_mode << 16 |
1609                   pic_param->pic_fields.bits.error_resilient_mode << 15 |
1610                   pic_param->pic_fields.bits.refresh_frame_context << 14 |
1611                   last_frame_type << 13 |
1612                   0 << 12 |   /* tx select mode */
1613                   0 << 11 |   /* Hybrid Prediction Mode */
1614                   use_pre_frame_mvs << 10 |
1615                   pic_param->pic_fields.bits.alt_ref_frame_sign_bias << 9 |
1616                   pic_param->pic_fields.bits.golden_ref_frame_sign_bias << 8 |
1617                   pic_param->pic_fields.bits.last_ref_frame_sign_bias << 7 |
1618                   pic_param->pic_fields.bits.mcomp_filter_type << 4 |
1619                   pic_param->pic_fields.bits.allow_high_precision_mv << 3 |
1620                   pic_param->pic_fields.bits.intra_only << 2 |
1621                   adapt_probabilities_flag << 1 |
1622                   pic_param->pic_fields.bits.frame_type << 0);              /* DW 2 */
1623     OUT_BCS_BATCH(batch,
1624                   pic_param->profile << 28 |
1625                   bit_depth_minus8 << 24 |
1626                   0 << 22 | /* only support 4:2:0 */
1627                   pic_param->log2_tile_rows << 8 |
1628                   pic_param->log2_tile_columns << 0);                      /* DW 3 */
1629     // resolution change case
1630
1631     // DW4-DW6
1632     for (i = 0; i < 3; i++) {
1633         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1634         gen9_vp9_surface = NULL;
1635         scale_w = 0;
1636         scale_h = 0;
1637         if (obj_surface && obj_surface->private_data) {
1638             gen9_vp9_surface = obj_surface->private_data;
1639             if (!gen9_hcpd_vp9_valid_ref_frame_size(gen9_vp9_surface->frame_width, gen9_vp9_surface->frame_height, pic_param->frame_width, pic_param->frame_height)) {
1640                 scale_w = -1;
1641                 scale_h = -1;
1642             } else {
1643                 scale_w = (gen9_vp9_surface->frame_width  << 14) / pic_param->frame_width ;
1644                 scale_h = (gen9_vp9_surface->frame_height << 14) / pic_param->frame_height ;
1645             }
1646             OUT_BCS_BATCH(batch,
1647                           scale_w << 16 |
1648                           scale_h);
1649         } else {
1650             OUT_BCS_BATCH(batch, 0);
1651         }
1652     }
1653
1654     // DW7-DW9
1655     for (i = 0; i < 3; i++) {
1656         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1657         gen9_vp9_surface = NULL;
1658
1659         if (obj_surface && obj_surface->private_data) {
1660             gen9_vp9_surface = obj_surface->private_data;
1661             OUT_BCS_BATCH(batch,
1662                           ((gen9_vp9_surface->frame_height - 1) & 0x3fff) << 16 |
1663                           ((gen9_vp9_surface->frame_width - 1) & 0x3fff));
1664         } else {
1665             OUT_BCS_BATCH(batch, 0);
1666         }
1667     }
1668
1669     OUT_BCS_BATCH(batch,
1670                   pic_param->first_partition_size << 16 |
1671                   pic_param->frame_header_length_in_bytes << 0); /* DW 10 */
1672     OUT_BCS_BATCH(batch,
1673                   (0 << 3) |
1674                   (0 << 2) |
1675                   (1 << 1) |
1676                   (0 << 0)); /* DW 11, ignored */
1677     //Rest of the DWs are not valid for BXT
1678     for (i = 12; i < LEN_COMMAND_OWN; i++) {
1679         OUT_BCS_BATCH(batch, 0);
1680     }
1681
1682     ADVANCE_BCS_BATCH(batch);
1683
1684 }
1685
1686 static void
1687 gen9_hcpd_vp9_segment_state(VADriverContextP ctx,
1688                             VADecPictureParameterBufferVP9 *pic_param,
1689                             VASegmentParameterVP9 *seg_param, uint8_t seg_id,
1690                             struct gen9_hcpd_context *gen9_hcpd_context)
1691 {
1692     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1693
1694     int segment_ref = seg_param->segment_flags.fields.segment_reference;
1695
1696     if ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)
1697         || (pic_param->pic_fields.bits.intra_only))
1698         segment_ref = 0;
1699
1700     BEGIN_BCS_BATCH(batch, 7);
1701
1702     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (7 - 2));
1703     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
1704     OUT_BCS_BATCH(batch,
1705                   seg_param->segment_flags.fields.segment_reference_enabled << 3 |
1706                   segment_ref << 1 |
1707                   seg_param->segment_flags.fields.segment_reference_skipped << 0); /* DW 2 */
1708     if (pic_param->filter_level) {
1709         OUT_BCS_BATCH(batch,
1710                       seg_param->filter_level[1][1] << 24    | //FilterLevelRef1Mode1
1711                       seg_param->filter_level[1][0] << 16    | //FilterLevelRef1Mode0
1712                       seg_param->filter_level[0][1] << 8     | //FilterLevelRef0Mode1
1713                       seg_param->filter_level[0][0] << 0);      //FilterLevelRef0Mode0 /* DW 3 */
1714         OUT_BCS_BATCH(batch,
1715                       seg_param->filter_level[3][1] << 24    | //FilterLevelRef3Mode1
1716                       seg_param->filter_level[3][0] << 16    | //FilterLevelRef3Mode0
1717                       seg_param->filter_level[2][1] << 8     | //FilterLevelRef2Mode1
1718                       seg_param->filter_level[2][0] << 0);     //FilterLevelRef2Mode0 /* DW 4 */
1719     } else {
1720         OUT_BCS_BATCH(batch,
1721                       0);       /* DW 3 */
1722         OUT_BCS_BATCH(batch,
1723                       0);       /* DW 4 */
1724     }
1725     OUT_BCS_BATCH(batch,
1726                   seg_param->luma_ac_quant_scale << 16   |
1727                   seg_param->luma_dc_quant_scale << 0);     /* DW 5 */
1728     OUT_BCS_BATCH(batch,
1729                   seg_param->chroma_ac_quant_scale << 16 |
1730                   seg_param->chroma_dc_quant_scale << 0);   /* DW 6 */
1731
1732     ADVANCE_BCS_BATCH(batch);
1733
1734 }
1735
1736 static void
1737 gen9_hcpd_vp9_bsd_object(VADriverContextP ctx,
1738                          VADecPictureParameterBufferVP9 *pic_param,
1739                          VASliceParameterBufferVP9 *slice_param,
1740                          struct gen9_hcpd_context *gen9_hcpd_context)
1741 {
1742     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1743     int slice_data_size   = slice_param->slice_data_size - pic_param->frame_header_length_in_bytes;
1744     int slice_data_offset = slice_param->slice_data_offset + pic_param->frame_header_length_in_bytes;
1745
1746     BEGIN_BCS_BATCH(batch, 3);
1747
1748     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
1749
1750     OUT_BCS_BATCH(batch, slice_data_size);
1751     OUT_BCS_BATCH(batch, slice_data_offset);
1752
1753     ADVANCE_BCS_BATCH(batch);
1754
1755 }
1756
1757 static VAStatus
1758 gen9_hcpd_vp9_decode_picture(VADriverContextP ctx,
1759                              struct decode_state *decode_state,
1760                              struct gen9_hcpd_context *gen9_hcpd_context)
1761 {
1762     VAStatus vaStatus = VA_STATUS_SUCCESS;
1763     struct i965_driver_data *i965 = i965_driver_data(ctx);
1764     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1765     VADecPictureParameterBufferVP9 *pic_param;
1766     VASliceParameterBufferVP9 *slice_param;
1767     dri_bo *slice_data_bo;
1768     dri_bo *tmp_bo;
1769     uint16_t tmp;
1770     int i = 0, num_segments = 0;
1771
1772     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1773     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
1774     slice_param = (VASliceParameterBufferVP9 *)decode_state->slice_params[0]->buffer;
1775     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1776
1777     if (slice_param->slice_data_size == 1) {
1778         goto out;
1779     }
1780
1781     vaStatus = gen9_hcpd_vp9_decode_init(ctx, decode_state, gen9_hcpd_context);
1782
1783     if (vaStatus != VA_STATUS_SUCCESS)
1784         goto out;
1785
1786     //Update segment id buffer if needed
1787     vp9_update_segmentId_buffer(ctx, decode_state, gen9_hcpd_context);
1788     //Update mv buffer if needed
1789     vp9_update_mv_temporal_buffer(ctx, decode_state, gen9_hcpd_context);
1790     //Update probability buffer if needed
1791     vp9_update_probabilities(ctx, decode_state, gen9_hcpd_context);
1792
1793     if (i965->intel.has_bsd2)
1794         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
1795     else
1796         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1797     intel_batchbuffer_emit_mi_flush(batch);
1798
1799     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_VP9, gen9_hcpd_context);
1800     //Not sure what the surface id value should be: Gold? ALtRef? PrevRef? or Just RefPic?
1801     gen9_hcpd_vp9_surface_state(ctx, decode_state, gen9_hcpd_context);
1802
1803     //Only one VASliceParameterBufferVP9 should be sent per frame
1804     slice_data_bo = decode_state->slice_datas[0]->bo;
1805
1806     gen9_hcpd_vp9_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
1807
1808     if (IS_GEN10(i965->intel.device_info))
1809         gen10_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1810     else
1811         gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1812
1813     //If segmentation is disabled, only SegParam[0] is valid,
1814     //all others should be populated with 0
1815     if (!pic_param->pic_fields.bits.segmentation_enabled)
1816         num_segments = 1;
1817     else  //If segmentation is enabled, all 8 entries should be valid.
1818         num_segments = 8;
1819
1820     for (i = 0; i < num_segments; i++) {
1821         VASegmentParameterVP9 seg_param = slice_param->seg_param[i];
1822         gen9_hcpd_vp9_segment_state(ctx, pic_param, &seg_param, i, gen9_hcpd_context);
1823     }
1824
1825     gen9_hcpd_vp9_pic_state(ctx, decode_state, gen9_hcpd_context);
1826     gen9_hcpd_vp9_bsd_object(ctx, pic_param, slice_param, gen9_hcpd_context);
1827
1828     intel_batchbuffer_end_atomic(batch);
1829     intel_batchbuffer_flush(batch);
1830
1831     // keep track of the last frame status
1832     gen9_hcpd_context->last_frame.frame_width = pic_param->frame_width;
1833     gen9_hcpd_context->last_frame.frame_height = pic_param->frame_height;
1834     gen9_hcpd_context->last_frame.show_frame = pic_param->pic_fields.bits.show_frame;
1835     gen9_hcpd_context->last_frame.frame_type = pic_param->pic_fields.bits.frame_type;
1836     gen9_hcpd_context->last_frame.refresh_frame_context = pic_param->pic_fields.bits.refresh_frame_context;
1837     gen9_hcpd_context->last_frame.frame_context_idx = pic_param->pic_fields.bits.frame_context_idx;
1838     gen9_hcpd_context->last_frame.intra_only = pic_param->pic_fields.bits.intra_only;
1839
1840     // switch mv buffer
1841     if (pic_param->pic_fields.bits.frame_type != HCP_VP9_KEY_FRAME) {
1842         tmp_bo = gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo;
1843         gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo;
1844         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo = tmp_bo;
1845
1846         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width;
1847         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width;
1848         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width = tmp;
1849
1850         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height;
1851         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height;
1852         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height = tmp;
1853
1854
1855     }
1856     //update vp9_frame_ctx according to frame_context_id
1857     if (pic_param->pic_fields.bits.refresh_frame_context) {
1858         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1859         void *pprob = NULL;
1860
1861         //update vp9_fc to frame_context
1862         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo, 1);
1863         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1864         if (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME ||
1865             pic_param->pic_fields.bits.intra_only)
1866             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE);
1867         else
1868             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1869
1870         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1871     }
1872
1873 out:
1874     return vaStatus;
1875 }
1876
1877
1878 static VAStatus
1879 gen9_hcpd_decode_picture(VADriverContextP ctx,
1880                          VAProfile profile,
1881                          union codec_state *codec_state,
1882                          struct hw_context *hw_context)
1883 {
1884     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1885     struct decode_state *decode_state = &codec_state->decode;
1886     VAStatus vaStatus;
1887
1888     assert(gen9_hcpd_context);
1889
1890     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
1891
1892     if (vaStatus != VA_STATUS_SUCCESS)
1893         goto out;
1894
1895     switch (profile) {
1896     case VAProfileHEVCMain:
1897     case VAProfileHEVCMain10:
1898         vaStatus = gen9_hcpd_hevc_decode_picture(ctx, decode_state, gen9_hcpd_context);
1899         break;
1900     case VAProfileVP9Profile0:
1901     case VAProfileVP9Profile2:
1902         vaStatus = gen9_hcpd_vp9_decode_picture(ctx, decode_state, gen9_hcpd_context);
1903         break;
1904
1905     default:
1906         /* should never get here 1!! */
1907         assert(0);
1908         break;
1909     }
1910
1911 out:
1912     return vaStatus;
1913 }
1914
1915 static void
1916 gen9_hcpd_context_destroy(void *hw_context)
1917 {
1918     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1919
1920     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer));
1921     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer));
1922     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer));
1923     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer));
1924     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer));
1925     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer));
1926     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer));
1927     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer));
1928     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer));
1929     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer));
1930     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer));
1931     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer));
1932     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer));
1933     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo);
1934     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1935
1936     intel_batchbuffer_free(gen9_hcpd_context->base.batch);
1937     free(gen9_hcpd_context);
1938 }
1939
1940 static void
1941 gen9_hcpd_hevc_context_init(VADriverContextP ctx,
1942                             struct gen9_hcpd_context *gen9_hcpd_context)
1943 {
1944     hevc_gen_default_iq_matrix(&gen9_hcpd_context->iq_matrix_hevc);
1945 }
1946
1947 static void
1948 gen9_hcpd_vp9_context_init(VADriverContextP ctx,
1949                            struct gen9_hcpd_context *gen9_hcpd_context)
1950 {
1951
1952     gen9_hcpd_context->last_frame.frame_height  = 0;
1953     gen9_hcpd_context->last_frame.show_frame    = 0;
1954     gen9_hcpd_context->last_frame.frame_type    = 0;
1955     gen9_hcpd_context->last_frame.refresh_frame_context = 0;
1956     gen9_hcpd_context->last_frame.frame_context_idx = 0;
1957     gen9_hcpd_context->last_frame.intra_only = 0;
1958     gen9_hcpd_context->last_frame.prob_buffer_saved_flag = 0;
1959     gen9_hcpd_context->last_frame.prob_buffer_restored_flag = 0;
1960
1961     //Super block in VP9 is 64x64
1962     gen9_hcpd_context->ctb_size = 64;
1963     gen9_hcpd_context->min_cb_size = 8; //Min block size is 8
1964
1965     vp9_gen_default_probabilities(ctx, gen9_hcpd_context);
1966 }
1967
1968 static struct hw_context *
1969 gen9_hcpd_context_init(VADriverContextP ctx, struct object_config *object_config)
1970 {
1971     struct intel_driver_data *intel = intel_driver_data(ctx);
1972     struct gen9_hcpd_context *gen9_hcpd_context = calloc(1, sizeof(struct gen9_hcpd_context));
1973     int i;
1974
1975     if (!gen9_hcpd_context)
1976         return NULL;
1977
1978     gen9_hcpd_context->base.destroy = gen9_hcpd_context_destroy;
1979     gen9_hcpd_context->base.run = gen9_hcpd_decode_picture;
1980     gen9_hcpd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
1981
1982     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1983         gen9_hcpd_context->reference_surfaces[i].surface_id = VA_INVALID_ID;
1984         gen9_hcpd_context->reference_surfaces[i].frame_store_id = -1;
1985         gen9_hcpd_context->reference_surfaces[i].obj_surface = NULL;
1986     }
1987
1988     switch (object_config->profile) {
1989     case VAProfileHEVCMain:
1990     case VAProfileHEVCMain10:
1991         gen9_hcpd_hevc_context_init(ctx, gen9_hcpd_context);
1992         break;
1993     case VAProfileVP9Profile0:
1994     case VAProfileVP9Profile2:
1995         gen9_hcpd_vp9_context_init(ctx, gen9_hcpd_context);
1996         break;
1997
1998     default:
1999         break;
2000     }
2001
2002     return (struct hw_context *)gen9_hcpd_context;
2003 }
2004
2005 struct hw_context *
2006 gen9_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2007 {
2008     if (obj_config->profile == VAProfileHEVCMain ||
2009         obj_config->profile == VAProfileHEVCMain10 ||
2010         obj_config->profile == VAProfileVP9Profile0 ||
2011         obj_config->profile == VAProfileVP9Profile2) {
2012         return gen9_hcpd_context_init(ctx, obj_config);
2013     } else {
2014         return gen8_dec_hw_context_init(ctx, obj_config);
2015     }
2016 }
2017
2018 void gen9_max_resolution(struct i965_driver_data *i965,
2019                          struct object_config *obj_config,
2020                          int *w,                                /* out */
2021                          int *h)                                /* out */
2022 {
2023     if (obj_config->profile == VAProfileJPEGBaseline) {
2024         *w = 8192;
2025         *h = 8192;
2026     } else {
2027         *w = i965->codec_info->max_width;
2028         *h = i965->codec_info->max_height;
2029     }
2030 }