OSDN Git Service

fed1bc1ffda3c4cdd59eefb3df6fc6c8c984903e
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfd.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va.h>
32 #include <va/va_dec_hevc.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "i965_decoder_utils.h"
39
40 #include "gen9_mfd.h"
41 #include "intel_media.h"
42 #include "vp9_probs.h"
43
44 #define OUT_BUFFER(buf_bo, is_target, ma)  do {                         \
45         if (buf_bo) {                                                   \
46             OUT_BCS_RELOC64(batch,                                        \
47                           buf_bo,                                       \
48                           I915_GEM_DOMAIN_RENDER,                       \
49                           is_target ? I915_GEM_DOMAIN_RENDER : 0,       \
50                           0);                                           \
51         } else {                                                        \
52             OUT_BCS_BATCH(batch, 0);                                    \
53             OUT_BCS_BATCH(batch, 0);                                    \
54         }                                                               \
55         if (ma)                                                         \
56             OUT_BCS_BATCH(batch, i965->intel.mocs_state);                                    \
57     } while (0)
58
59 #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER(buf_bo, 1, 1)
60 #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER(buf_bo, 0, 1)
61 #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER(buf_bo, 1, 0)
62 #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER(buf_bo, 0, 0)
63
64 static void
65 gen9_hcpd_init_hevc_surface(VADriverContextP ctx,
66                             VAPictureParameterBufferHEVC *pic_param,
67                             struct object_surface *obj_surface,
68                             struct gen9_hcpd_context *gen9_hcpd_context)
69 {
70     struct i965_driver_data *i965 = i965_driver_data(ctx);
71     GenHevcSurface *gen9_hevc_surface;
72
73     if (!obj_surface)
74         return;
75
76     obj_surface->free_private_data = gen_free_hevc_surface;
77     gen9_hevc_surface = obj_surface->private_data;
78
79     if (!gen9_hevc_surface) {
80         gen9_hevc_surface = calloc(sizeof(GenHevcSurface), 1);
81         assert(gen9_hevc_surface);
82         gen9_hevc_surface->base.frame_store_id = -1;
83         obj_surface->private_data = gen9_hevc_surface;
84     }
85
86     if (gen9_hevc_surface->motion_vector_temporal_bo == NULL) {
87         uint32_t size;
88
89         if (gen9_hcpd_context->ctb_size == 16)
90             size = ((gen9_hcpd_context->picture_width_in_pixels + 63) >> 6) *
91                 ((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4);
92         else
93             size = ((gen9_hcpd_context->picture_width_in_pixels + 31) >> 5) *
94                 ((gen9_hcpd_context->picture_height_in_pixels + 31) >> 5);
95
96         size <<= 6; /* in unit of 64bytes */
97         gen9_hevc_surface->motion_vector_temporal_bo = dri_bo_alloc(i965->intel.bufmgr,
98                                                                     "motion vector temporal buffer",
99                                                                     size,
100                                                                     0x1000);
101     }
102 }
103
104 static VAStatus
105 gen9_hcpd_hevc_decode_init(VADriverContextP ctx,
106                            struct decode_state *decode_state,
107                            struct gen9_hcpd_context *gen9_hcpd_context)
108 {
109     struct i965_driver_data *i965 = i965_driver_data(ctx);
110     VAPictureParameterBufferHEVC *pic_param;
111     struct object_surface *obj_surface;
112     uint32_t size;
113     int size_shift = 3;
114
115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
116     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
117     intel_update_hevc_frame_store_index(ctx,
118                                         decode_state,
119                                         pic_param,
120                                         gen9_hcpd_context->reference_surfaces,
121                                         &gen9_hcpd_context->fs_ctx);
122
123     gen9_hcpd_context->picture_width_in_pixels = pic_param->pic_width_in_luma_samples;
124     gen9_hcpd_context->picture_height_in_pixels = pic_param->pic_height_in_luma_samples;
125     gen9_hcpd_context->ctb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 +
126                                          3 +
127                                          pic_param->log2_diff_max_min_luma_coding_block_size));
128     gen9_hcpd_context->picture_width_in_ctbs = ALIGN(gen9_hcpd_context->picture_width_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
129     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(gen9_hcpd_context->picture_height_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
130     gen9_hcpd_context->min_cb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 + 3));
131     gen9_hcpd_context->picture_width_in_min_cb_minus1 = gen9_hcpd_context->picture_width_in_pixels / gen9_hcpd_context->min_cb_size - 1;
132     gen9_hcpd_context->picture_height_in_min_cb_minus1 = gen9_hcpd_context->picture_height_in_pixels / gen9_hcpd_context->min_cb_size - 1;
133
134     /* Current decoded picture */
135     obj_surface = decode_state->render_object;
136     hevc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
137     gen9_hcpd_init_hevc_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
138
139     if((pic_param->bit_depth_luma_minus8 > 0)
140         || (pic_param->bit_depth_chroma_minus8 > 0))
141         size_shift = 2;
142
143     size = ALIGN(gen9_hcpd_context->picture_width_in_pixels, 32) >> size_shift;
144     size <<= 6;
145     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
146     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
147
148     size = ALIGN(gen9_hcpd_context->picture_height_in_pixels + 6 * gen9_hcpd_context->picture_height_in_ctbs, 32) >> size_shift;
149     size <<= 6;
150     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
151
152     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 188 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
153     size <<= 6;
154     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
155
156     size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 172 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
157     size <<= 6;
158     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
159
160     if (IS_CHERRYVIEW(i965->intel.device_info))
161         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 256 + 9 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
162     else
163         size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 176 + 89 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
164     size <<= 6;
165     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
166
167     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 3 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
168     size <<= 6;
169     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer), "sao line buffer", size);
170
171     size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> size_shift;
172     size <<= 6;
173     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer), "sao tile line buffer", size);
174
175     size = ALIGN(((gen9_hcpd_context->picture_height_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_height_in_ctbs), 16) >> size_shift;
176     size <<= 6;
177     ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer), "sao tile column buffer", size);
178
179     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
180     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
181     gen9_hcpd_context->first_inter_slice_valid = 0;
182
183     return VA_STATUS_SUCCESS;
184 }
185
186 static void
187 gen9_hcpd_pipe_mode_select(VADriverContextP ctx,
188                            struct decode_state *decode_state,
189                            int codec,
190                            struct gen9_hcpd_context *gen9_hcpd_context)
191 {
192     struct i965_driver_data *i965 = i965_driver_data(ctx);
193     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
194
195     assert((codec == HCP_CODEC_HEVC) || (codec == HCP_CODEC_VP9));
196
197     if(IS_KBL(i965->intel.device_info))
198     {
199         BEGIN_BCS_BATCH(batch, 6);
200
201         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
202     }
203     else
204     {
205         BEGIN_BCS_BATCH(batch, 4);
206
207         OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
208     }
209     OUT_BCS_BATCH(batch,
210                   (codec << 5) |
211                   (0 << 3) | /* disable Pic Status / Error Report */
212                   HCP_CODEC_SELECT_DECODE);
213     OUT_BCS_BATCH(batch, 0);
214     OUT_BCS_BATCH(batch, 0);
215
216     if(IS_KBL(i965->intel.device_info))
217     {
218         if(codec == HCP_CODEC_VP9)
219             OUT_BCS_BATCH(batch, 1<<6);
220         else
221             OUT_BCS_BATCH(batch, 0);
222
223         OUT_BCS_BATCH(batch, 0);
224     }
225
226     ADVANCE_BCS_BATCH(batch);
227 }
228
229 static void
230 gen9_hcpd_surface_state(VADriverContextP ctx,
231                         struct decode_state *decode_state,
232                         struct gen9_hcpd_context *gen9_hcpd_context)
233 {
234     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
235     struct object_surface *obj_surface = decode_state->render_object;
236     unsigned int y_cb_offset;
237     VAPictureParameterBufferHEVC *pic_param;
238
239     assert(obj_surface);
240
241     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
242     y_cb_offset = obj_surface->y_cb_offset;
243
244     BEGIN_BCS_BATCH(batch, 3);
245
246     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
247     OUT_BCS_BATCH(batch,
248                   (0 << 28) |                   /* surface id */
249                   (obj_surface->width - 1));    /* pitch - 1 */
250     if((pic_param->bit_depth_luma_minus8 > 0)
251         || (pic_param->bit_depth_chroma_minus8 > 0))
252     {
253         OUT_BCS_BATCH(batch,
254                   (SURFACE_FORMAT_P010 << 28) |
255                   y_cb_offset);
256     }
257     else
258     {
259         OUT_BCS_BATCH(batch,
260                   (SURFACE_FORMAT_PLANAR_420_8 << 28) |
261                   y_cb_offset);
262     }
263
264     ADVANCE_BCS_BATCH(batch);
265 }
266
267 static void
268 gen9_hcpd_pipe_buf_addr_state(VADriverContextP ctx,
269                               struct decode_state *decode_state,
270                               struct gen9_hcpd_context *gen9_hcpd_context)
271 {
272     struct i965_driver_data *i965 = i965_driver_data(ctx);
273     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
274     struct object_surface *obj_surface;
275     GenHevcSurface *gen9_hevc_surface;
276     int i;
277
278     BEGIN_BCS_BATCH(batch, 95);
279
280     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
281
282     obj_surface = decode_state->render_object;
283     assert(obj_surface && obj_surface->bo);
284     gen9_hevc_surface = obj_surface->private_data;
285     assert(gen9_hevc_surface && gen9_hevc_surface->motion_vector_temporal_bo);
286
287     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
288     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
289     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
290     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
291     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
292     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
293     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
294     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_line_buffer.bo);              /* DW 22..24 */
295     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
296     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
297     OUT_BUFFER_MA_TARGET(gen9_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
298     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
299
300     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
301         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
302
303         if (obj_surface)
304             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
305         else
306             OUT_BUFFER_NMA_REFERENCE(NULL);
307     }
308     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
309
310     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
311     OUT_BUFFER_MA_TARGET(NULL);
312     OUT_BUFFER_MA_TARGET(NULL);
313     OUT_BUFFER_MA_TARGET(NULL);
314
315     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
316         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
317         gen9_hevc_surface = NULL;
318
319         if (obj_surface && obj_surface->private_data)
320             gen9_hevc_surface = obj_surface->private_data;
321
322         if (gen9_hevc_surface)
323             OUT_BUFFER_NMA_REFERENCE(gen9_hevc_surface->motion_vector_temporal_bo);
324         else
325             OUT_BUFFER_NMA_REFERENCE(NULL);
326     }
327     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
328
329     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
330     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
331     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
332     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
333
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen9_hcpd_ind_obj_base_addr_state(VADriverContextP ctx,
339                                   dri_bo *slice_data_bo,
340                                   struct gen9_hcpd_context *gen9_hcpd_context)
341 {
342     struct i965_driver_data *i965 = i965_driver_data(ctx);
343     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
344
345     BEGIN_BCS_BATCH(batch, 14);
346
347     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
348     OUT_BUFFER_MA_REFERENCE(slice_data_bo);        /* DW 1..3 */
349     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
350     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 6..8, CU, ignored */
351     OUT_BUFFER_MA_TARGET(NULL);                    /* DW 9..11, PAK-BSE, ignored */
352     OUT_BUFFER_NMA_TARGET(NULL);                   /* DW 12..13, Upper Bound  */
353
354     ADVANCE_BCS_BATCH(batch);
355 }
356
357 static void
358 gen9_hcpd_qm_state(VADriverContextP ctx,
359                    int size_id,
360                    int color_component,
361                    int pred_type,
362                    int dc,
363                    unsigned char *qm,
364                    int qm_length,
365                    struct gen9_hcpd_context *gen9_hcpd_context)
366 {
367     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
368     unsigned char qm_buffer[64];
369
370     assert(qm_length <= 64);
371     memset(qm_buffer, 0, sizeof(qm_buffer));
372     memcpy(qm_buffer, qm, qm_length);
373
374     BEGIN_BCS_BATCH(batch, 18);
375
376     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch,
378                   dc << 5 |
379                   color_component << 3 |
380                   size_id << 1 |
381                   pred_type);
382     intel_batchbuffer_data(batch, qm_buffer, 64);
383
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen9_hcpd_hevc_qm_state(VADriverContextP ctx,
389                         struct decode_state *decode_state,
390                         struct gen9_hcpd_context *gen9_hcpd_context)
391 {
392     VAIQMatrixBufferHEVC *iq_matrix;
393     VAPictureParameterBufferHEVC *pic_param;
394     int i;
395
396     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
397         iq_matrix = (VAIQMatrixBufferHEVC *)decode_state->iq_matrix->buffer;
398     else
399         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
400
401     assert(decode_state->pic_param && decode_state->pic_param->buffer);
402     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
403
404     if (!pic_param->pic_fields.bits.scaling_list_enabled_flag)
405         iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
406
407     for (i = 0; i < 6; i++) {
408         gen9_hcpd_qm_state(ctx,
409                            0, i % 3, i / 3, 0,
410                            iq_matrix->ScalingList4x4[i], 16,
411                            gen9_hcpd_context);
412     }
413
414     for (i = 0; i < 6; i++) {
415         gen9_hcpd_qm_state(ctx,
416                            1, i % 3, i / 3, 0,
417                            iq_matrix->ScalingList8x8[i], 64,
418                            gen9_hcpd_context);
419     }
420
421     for (i = 0; i < 6; i++) {
422         gen9_hcpd_qm_state(ctx,
423                            2, i % 3, i / 3, iq_matrix->ScalingListDC16x16[i],
424                            iq_matrix->ScalingList16x16[i], 64,
425                            gen9_hcpd_context);
426     }
427
428     for (i = 0; i < 2; i++) {
429         gen9_hcpd_qm_state(ctx,
430                            3, 0, i % 2, iq_matrix->ScalingListDC32x32[i],
431                            iq_matrix->ScalingList32x32[i], 64,
432                            gen9_hcpd_context);
433     }
434 }
435
436 static void
437 gen9_hcpd_pic_state(VADriverContextP ctx,
438                     struct decode_state *decode_state,
439                     struct gen9_hcpd_context *gen9_hcpd_context)
440 {
441     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
442     VAPictureParameterBufferHEVC *pic_param;
443     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
444     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
445     /*
446      * 7.4.3.1
447      *
448      * When not present, the value of loop_filter_across_tiles_enabled_flag
449      * is inferred to be equal to 1.
450      */
451     int loop_filter_across_tiles_enabled_flag = 1;
452
453     assert(decode_state->pic_param && decode_state->pic_param->buffer);
454     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
455
456     if (pic_param->pic_fields.bits.pcm_enabled_flag) {
457         max_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3 +
458             pic_param->log2_diff_max_min_pcm_luma_coding_block_size;
459         min_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3;
460         pcm_sample_bit_depth_luma_minus1 = (pic_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
461         pcm_sample_bit_depth_chroma_minus1 = (pic_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
462     } else {
463         max_pcm_size_minus3 = MIN(pic_param->log2_min_luma_coding_block_size_minus3 + pic_param->log2_diff_max_min_luma_coding_block_size, 2);
464     }
465
466     if (pic_param->pic_fields.bits.tiles_enabled_flag)
467         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
468
469     BEGIN_BCS_BATCH(batch, 19);
470
471     OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
472
473     OUT_BCS_BATCH(batch,
474                   gen9_hcpd_context->picture_height_in_min_cb_minus1 << 16 |
475                   gen9_hcpd_context->picture_width_in_min_cb_minus1);
476     OUT_BCS_BATCH(batch,
477                   max_pcm_size_minus3 << 10 |
478                   min_pcm_size_minus3 << 8 |
479                   (pic_param->log2_min_transform_block_size_minus2 +
480                    pic_param->log2_diff_max_min_transform_block_size) << 6 |
481                   pic_param->log2_min_transform_block_size_minus2 << 4 |
482                   (pic_param->log2_min_luma_coding_block_size_minus3 +
483                    pic_param->log2_diff_max_min_luma_coding_block_size) << 2 |
484                   pic_param->log2_min_luma_coding_block_size_minus3);
485     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
486     OUT_BCS_BATCH(batch,
487                   0 << 27 |
488                   pic_param->pic_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
489                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
490                   pic_param->pic_fields.bits.amp_enabled_flag << 23 |
491                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
492                   !(pic_param->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 21 |
493                   !!(pic_param->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) << 20 |
494                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
495                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
496                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |
497                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
498                   loop_filter_across_tiles_enabled_flag << 15 |
499                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |
500                   pic_param->log2_parallel_merge_level_minus2 << 10 |
501                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |
502                   pic_param->pic_fields.bits.pcm_loop_filter_disabled_flag << 8 |
503                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |
504                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |
505                   pic_param->pic_fields.bits.pcm_enabled_flag << 4 |
506                   pic_param->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag << 3 |
507                   0);
508     OUT_BCS_BATCH(batch,
509                   pic_param->bit_depth_luma_minus8 << 27 |
510                   pic_param->bit_depth_chroma_minus8 << 24 |
511                   pcm_sample_bit_depth_luma_minus1 << 20 |
512                   pcm_sample_bit_depth_chroma_minus1 << 16 |
513                   pic_param->max_transform_hierarchy_depth_inter << 13 |
514                   pic_param->max_transform_hierarchy_depth_intra << 10 |
515                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
516                   (pic_param->pps_cb_qp_offset & 0x1f));
517     OUT_BCS_BATCH(batch,
518                   0 << 29 |
519                   0);
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522     OUT_BCS_BATCH(batch, 0);
523     OUT_BCS_BATCH(batch, 0); /* DW 10 */
524     OUT_BCS_BATCH(batch, 0);
525     OUT_BCS_BATCH(batch, 0);
526     OUT_BCS_BATCH(batch, 0);
527     OUT_BCS_BATCH(batch, 0);
528     OUT_BCS_BATCH(batch, 0); /* DW 15 */
529     OUT_BCS_BATCH(batch, 0);
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532
533     ADVANCE_BCS_BATCH(batch);
534 }
535
536 static void
537 gen9_hcpd_tile_state(VADriverContextP ctx,
538                      struct decode_state *decode_state,
539                      struct gen9_hcpd_context *gen9_hcpd_context)
540 {
541     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
542     VAPictureParameterBufferHEVC *pic_param;
543     uint8_t pos_col[20], pos_row[24];
544     int i;
545
546     assert(decode_state->pic_param && decode_state->pic_param->buffer);
547     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
548
549     memset(pos_col, 0, sizeof(pos_col));
550     memset(pos_row, 0, sizeof(pos_row));
551
552     for (i = 0; i <= MIN(pic_param->num_tile_columns_minus1, 18); i++)
553         pos_col[i + 1] = pos_col[i] + pic_param->column_width_minus1[i] + 1;
554
555     for (i = 0; i <= MIN(pic_param->num_tile_rows_minus1, 20); i++)
556         pos_row[i + 1] = pos_row[i] + pic_param->row_height_minus1[i] + 1;
557
558     BEGIN_BCS_BATCH(batch, 13);
559
560     OUT_BCS_BATCH(batch, HCP_TILE_STATE | (13 - 2));
561
562     OUT_BCS_BATCH(batch,
563                   pic_param->num_tile_columns_minus1 << 5 |
564                   pic_param->num_tile_rows_minus1);
565     intel_batchbuffer_data(batch, pos_col, 20);
566     intel_batchbuffer_data(batch, pos_row, 24);
567
568     ADVANCE_BCS_BATCH(batch);
569 }
570
571 static int
572 gen9_hcpd_get_reference_picture_frame_id(VAPictureHEVC *ref_pic,
573                                          GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
574 {
575     int i;
576
577     if (ref_pic->picture_id == VA_INVALID_ID ||
578         (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
579         return 0;
580
581     for (i = 0; i < MAX_GEN_HCP_REFERENCE_FRAMES; i++) {
582         if (ref_pic->picture_id == frame_store[i].surface_id) {
583             assert(frame_store[i].frame_store_id < MAX_GEN_HCP_REFERENCE_FRAMES);
584             return frame_store[i].frame_store_id;
585         }
586     }
587
588     /* Should never get here !!! */
589     assert(0);
590     return 0;
591 }
592
593 static void
594 gen9_hcpd_ref_idx_state_1(struct intel_batchbuffer *batch,
595                           int list,
596                           VAPictureParameterBufferHEVC *pic_param,
597                           VASliceParameterBufferHEVC *slice_param,
598                           GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
599 {
600     int i;
601     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
602     uint8_t *ref_list = slice_param->RefPicList[list];
603
604     BEGIN_BCS_BATCH(batch, 18);
605
606     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
607     OUT_BCS_BATCH(batch,
608                   num_ref_minus1 << 1 |
609                   list);
610
611     for (i = 0; i < 16; i++) {
612         if (i < MIN((num_ref_minus1 + 1), 15)) {
613             VAPictureHEVC *ref_pic = &pic_param->ReferenceFrames[ref_list[i]];
614             VAPictureHEVC *curr_pic = &pic_param->CurrPic;
615
616             OUT_BCS_BATCH(batch,
617                           !(ref_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 15 |
618                           !!(ref_pic->flags & VA_PICTURE_HEVC_FIELD_PIC) << 14 |
619                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |
620                           0 << 12 |
621                           0 << 11 |
622                           gen9_hcpd_get_reference_picture_frame_id(ref_pic, frame_store) << 8 |
623                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
624         } else {
625             OUT_BCS_BATCH(batch, 0);
626         }
627     }
628
629     ADVANCE_BCS_BATCH(batch);
630 }
631
632 static void
633 gen9_hcpd_ref_idx_state(VADriverContextP ctx,
634                         VAPictureParameterBufferHEVC *pic_param,
635                         VASliceParameterBufferHEVC *slice_param,
636                         struct gen9_hcpd_context *gen9_hcpd_context)
637 {
638     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
639
640     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
641         return;
642
643     gen9_hcpd_ref_idx_state_1(batch, 0, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
644
645     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
646         return;
647
648     gen9_hcpd_ref_idx_state_1(batch, 1, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
649 }
650
651 static void
652 gen9_hcpd_weightoffset_state_1(struct intel_batchbuffer *batch,
653                                int list,
654                                VASliceParameterBufferHEVC *slice_param)
655 {
656     int i;
657     uint8_t num_ref_minus1 = (list == 1) ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1;
658     int8_t *luma_offset = (list == 1) ? slice_param->luma_offset_l1 : slice_param->luma_offset_l0;
659     int8_t *delta_luma_weight = (list == 1) ? slice_param->delta_luma_weight_l1 : slice_param->delta_luma_weight_l0;
660     int8_t (* chroma_offset)[2] = (list == 1) ? slice_param->ChromaOffsetL1 : slice_param->ChromaOffsetL0;
661     int8_t (* delta_chroma_weight)[2] = (list == 1) ? slice_param->delta_chroma_weight_l1 : slice_param->delta_chroma_weight_l0;
662
663     BEGIN_BCS_BATCH(batch, 34);
664
665     OUT_BCS_BATCH(batch, HCP_WEIGHTOFFSET | (34 - 2));
666     OUT_BCS_BATCH(batch, list);
667
668     for (i = 0; i < 16; i++) {
669         if (i < MIN((num_ref_minus1 + 1), 15)) {
670             OUT_BCS_BATCH(batch,
671                           (luma_offset[i] & 0xff) << 8 |
672                           (delta_luma_weight[i] & 0xff));
673         } else {
674             OUT_BCS_BATCH(batch, 0);
675         }
676     }
677     for (i = 0; i < 16; i++) {
678         if (i < MIN((num_ref_minus1 + 1), 15)) {
679             OUT_BCS_BATCH(batch,
680                           (chroma_offset[i][1] & 0xff) << 24 |
681                           (delta_chroma_weight[i][1] & 0xff) << 16 |
682                           (chroma_offset[i][0] & 0xff) << 8 |
683                           (delta_chroma_weight[i][0] & 0xff));
684         } else {
685             OUT_BCS_BATCH(batch, 0);
686         }
687     }
688
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static void
693 gen9_hcpd_weightoffset_state(VADriverContextP ctx,
694                              VAPictureParameterBufferHEVC *pic_param,
695                              VASliceParameterBufferHEVC *slice_param,
696                              struct gen9_hcpd_context *gen9_hcpd_context)
697 {
698     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
699
700     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
701         return;
702
703     if ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P &&
704          !pic_param->pic_fields.bits.weighted_pred_flag) ||
705         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
706          !pic_param->pic_fields.bits.weighted_bipred_flag))
707         return;
708
709     gen9_hcpd_weightoffset_state_1(batch, 0, slice_param);
710
711     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
712         return;
713
714     gen9_hcpd_weightoffset_state_1(batch, 1, slice_param);
715 }
716
717 static int
718 gen9_hcpd_get_collocated_ref_idx(VADriverContextP ctx,
719                                  VAPictureParameterBufferHEVC *pic_param,
720                                  VASliceParameterBufferHEVC *slice_param,
721                                  struct gen9_hcpd_context *gen9_hcpd_context)
722 {
723     uint8_t *ref_list;
724     VAPictureHEVC *ref_pic;
725
726     if (slice_param->collocated_ref_idx > 14)
727         return 0;
728
729     if (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
730         return 0;
731
732     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
733         return 0;
734
735     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P ||
736         (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
737          slice_param->LongSliceFlags.fields.collocated_from_l0_flag))
738         ref_list = slice_param->RefPicList[0];
739     else {
740         assert(slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B);
741         ref_list = slice_param->RefPicList[1];
742     }
743
744     ref_pic = &pic_param->ReferenceFrames[ref_list[slice_param->collocated_ref_idx]];
745
746     return gen9_hcpd_get_reference_picture_frame_id(ref_pic, gen9_hcpd_context->reference_surfaces);
747 }
748
749 static int
750 gen9_hcpd_is_list_low_delay(uint8_t ref_list_count,
751                             uint8_t ref_list[15],
752                             VAPictureHEVC *curr_pic,
753                             VAPictureHEVC ref_surfaces[15])
754 {
755     int i;
756
757     for (i = 0; i < MIN(ref_list_count, 15); i++) {
758         VAPictureHEVC *ref_pic;
759
760         if (ref_list[i] > 14)
761             continue;
762
763         ref_pic = &ref_surfaces[ref_list[i]];
764
765         if (ref_pic->pic_order_cnt > curr_pic->pic_order_cnt)
766             return 0;
767     }
768
769     return 1;
770 }
771
772 static int
773 gen9_hcpd_is_low_delay(VADriverContextP ctx,
774                        VAPictureParameterBufferHEVC *pic_param,
775                        VASliceParameterBufferHEVC *slice_param)
776 {
777     if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
778         return 0;
779     else if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
780         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
781                                            slice_param->RefPicList[0],
782                                            &pic_param->CurrPic,
783                                            pic_param->ReferenceFrames);
784     else
785         return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
786                                            slice_param->RefPicList[0],
787                                            &pic_param->CurrPic,
788                                            pic_param->ReferenceFrames) &&
789             gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l1_active_minus1 + 1,
790                                         slice_param->RefPicList[1],
791                                         &pic_param->CurrPic,
792                                         pic_param->ReferenceFrames);
793 }
794
795 static void
796 gen9_hcpd_slice_state(VADriverContextP ctx,
797                       VAPictureParameterBufferHEVC *pic_param,
798                       VASliceParameterBufferHEVC *slice_param,
799                       VASliceParameterBufferHEVC *next_slice_param,
800                       struct gen9_hcpd_context *gen9_hcpd_context)
801 {
802     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
803     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
804     unsigned short collocated_ref_idx, collocated_from_l0_flag;
805     int sliceqp_sign_flag = 0, sliceqp = 0;
806
807     slice_hor_pos = slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
808     slice_ver_pos = slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
809
810     if (next_slice_param) {
811         next_slice_hor_pos = next_slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
812         next_slice_ver_pos = next_slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
813     } else {
814         next_slice_hor_pos = 0;
815         next_slice_ver_pos = 0;
816     }
817
818     collocated_ref_idx = gen9_hcpd_get_collocated_ref_idx(ctx, pic_param, slice_param, gen9_hcpd_context);
819     collocated_from_l0_flag = slice_param->LongSliceFlags.fields.collocated_from_l0_flag;
820
821     if ((!gen9_hcpd_context->first_inter_slice_valid) &&
822         (slice_param->LongSliceFlags.fields.slice_type != HEVC_SLICE_I) &&
823         slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag) {
824         gen9_hcpd_context->first_inter_slice_collocated_ref_idx = collocated_ref_idx;
825         gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = collocated_from_l0_flag;
826         gen9_hcpd_context->first_inter_slice_valid = 1;
827     }
828
829     /* HW requirement */
830     if (gen9_hcpd_context->first_inter_slice_valid &&
831         ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I) ||
832          (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag))) {
833         collocated_ref_idx = gen9_hcpd_context->first_inter_slice_collocated_ref_idx;
834         collocated_from_l0_flag = gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag;
835     }
836
837     sliceqp = pic_param->init_qp_minus26 + 26 + slice_param->slice_qp_delta;
838     if((pic_param->bit_depth_luma_minus8 > 0)
839         || (pic_param->bit_depth_chroma_minus8 > 0))
840     {
841         if(sliceqp < 0)
842         {
843             sliceqp_sign_flag = 1;
844             sliceqp = -sliceqp;
845         }
846     }
847
848     BEGIN_BCS_BATCH(batch, 9);
849
850     OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
851
852     OUT_BCS_BATCH(batch,
853                   slice_ver_pos << 16 |
854                   slice_hor_pos);
855     OUT_BCS_BATCH(batch,
856                   next_slice_ver_pos << 16 |
857                   next_slice_hor_pos);
858     OUT_BCS_BATCH(batch,
859                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
860                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
861                   sliceqp << 6 |
862                   slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag << 5 |
863                   slice_param->LongSliceFlags.fields.dependent_slice_segment_flag << 4 |
864                   sliceqp_sign_flag << 3 |
865                   !next_slice_param << 2 |
866                   slice_param->LongSliceFlags.fields.slice_type);
867     OUT_BCS_BATCH(batch,
868                   collocated_ref_idx << 26 |
869                   (5 - slice_param->five_minus_max_num_merge_cand - 1) << 23 |
870                   slice_param->LongSliceFlags.fields.cabac_init_flag << 22 |
871                   slice_param->luma_log2_weight_denom << 19 |
872                   ((slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) & 0x7) << 16 |
873                   collocated_from_l0_flag << 15 |
874                   gen9_hcpd_is_low_delay(ctx, pic_param, slice_param) << 14 |
875                   slice_param->LongSliceFlags.fields.mvd_l1_zero_flag << 13 |
876                   slice_param->LongSliceFlags.fields.slice_sao_luma_flag << 12 |
877                   slice_param->LongSliceFlags.fields.slice_sao_chroma_flag << 11 |
878                   slice_param->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag << 10 |
879                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
880                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
881                   slice_param->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag);
882     OUT_BCS_BATCH(batch,
883                   slice_param->slice_data_byte_offset); /* DW 5 */
884     OUT_BCS_BATCH(batch,
885                   0 << 26 |
886                   0 << 20 |
887                   0);
888     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
889     OUT_BCS_BATCH(batch, 0);    /* Ignored for decoding */
890
891     ADVANCE_BCS_BATCH(batch);
892 }
893
894 static void
895 gen9_hcpd_bsd_object(VADriverContextP ctx,
896                      VASliceParameterBufferHEVC *slice_param,
897                      struct gen9_hcpd_context *gen9_hcpd_context)
898 {
899     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
900
901     BEGIN_BCS_BATCH(batch, 3);
902
903     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
904
905     OUT_BCS_BATCH(batch, slice_param->slice_data_size);
906     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
907
908     ADVANCE_BCS_BATCH(batch);
909 }
910
911 static VAStatus
912 gen9_hcpd_hevc_decode_picture(VADriverContextP ctx,
913                               struct decode_state *decode_state,
914                               struct gen9_hcpd_context *gen9_hcpd_context)
915 {
916     VAStatus vaStatus;
917     struct i965_driver_data *i965 = i965_driver_data(ctx);
918     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
919     VAPictureParameterBufferHEVC *pic_param;
920     VASliceParameterBufferHEVC *slice_param, *next_slice_param, *next_slice_group_param;
921     dri_bo *slice_data_bo;
922     int i, j;
923
924     vaStatus = gen9_hcpd_hevc_decode_init(ctx, decode_state, gen9_hcpd_context);
925
926     if (vaStatus != VA_STATUS_SUCCESS)
927         goto out;
928
929     assert(decode_state->pic_param && decode_state->pic_param->buffer);
930     pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
931
932     if (i965->intel.has_bsd2)
933         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
934     else
935         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
936     intel_batchbuffer_emit_mi_flush(batch);
937
938     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_HEVC, gen9_hcpd_context);
939     gen9_hcpd_surface_state(ctx, decode_state, gen9_hcpd_context);
940     gen9_hcpd_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
941     gen9_hcpd_hevc_qm_state(ctx, decode_state, gen9_hcpd_context);
942     gen9_hcpd_pic_state(ctx, decode_state, gen9_hcpd_context);
943
944     if (pic_param->pic_fields.bits.tiles_enabled_flag)
945         gen9_hcpd_tile_state(ctx, decode_state, gen9_hcpd_context);
946
947     /* Need to double it works or not if the two slice groups have differenct slice data buffers */
948     for (j = 0; j < decode_state->num_slice_params; j++) {
949         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
950         slice_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j]->buffer;
951         slice_data_bo = decode_state->slice_datas[j]->bo;
952
953         gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
954
955         if (j == decode_state->num_slice_params - 1)
956             next_slice_group_param = NULL;
957         else
958             next_slice_group_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j + 1]->buffer;
959
960         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
961             if (i < decode_state->slice_params[j]->num_elements - 1)
962                 next_slice_param = slice_param + 1;
963             else
964                 next_slice_param = next_slice_group_param;
965
966             gen9_hcpd_slice_state(ctx, pic_param, slice_param, next_slice_param, gen9_hcpd_context);
967             gen9_hcpd_ref_idx_state(ctx, pic_param, slice_param, gen9_hcpd_context);
968             gen9_hcpd_weightoffset_state(ctx, pic_param, slice_param, gen9_hcpd_context);
969             gen9_hcpd_bsd_object(ctx, slice_param, gen9_hcpd_context);
970             slice_param++;
971         }
972     }
973
974     intel_batchbuffer_end_atomic(batch);
975     intel_batchbuffer_flush(batch);
976
977 out:
978     return vaStatus;
979 }
980
981 /*********************************************************/
982 /*                  VP9 Code                             */
983 /*********************************************************/
984
985
986 #define VP9_PROB_BUFFER_FIRST_PART_SIZE 2010
987 #define VP9_PROB_BUFFER_SECOND_PART_SIZE 10
988 #define VP9_PROB_BUFFER_KEY_INTER_OFFSET 1667
989 #define VP9_PROB_BUFFER_KEY_INTER_SIZE   343
990
991 #define VP9_PROB_BUFFER_UPDATE_NO   0
992 #define VP9_PROB_BUFFER_UPDATE_SECNE_1    1
993 #define VP9_PROB_BUFFER_UPDATE_SECNE_2    2
994 #define VP9_PROB_BUFFER_UPDATE_SECNE_3    3
995 #define VP9_PROB_BUFFER_UPDATE_SECNE_4    4
996 #define VP9_PROB_BUFFER_UPDATE_SECNE_5    5
997
998 #define VP9_PROB_BUFFER_SAVED_NO   0
999 #define VP9_PROB_BUFFER_SAVED_SECNE_1    1
1000 #define VP9_PROB_BUFFER_SAVED_SECNE_2    2
1001
1002 #define VP9_PROB_BUFFER_RESTORED_NO   0
1003 #define VP9_PROB_BUFFER_RESTORED_SECNE_1    1
1004 #define VP9_PROB_BUFFER_RESTORED_SECNE_2    2
1005 #define VP9_PROB_BUFFER_RESTORED_SECNE_MAX    (VP9_PROB_BUFFER_RESTORED_SECNE_2 + 1)
1006
1007 #define ALLOC_MV_BUFFER(gen_buffer, string, size,width,height)  \
1008     do {                                                        \
1009         dri_bo_unreference(gen_buffer->bo);                     \
1010         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
1011                                       string,                   \
1012                                       size,                     \
1013                                       0x1000);                  \
1014         assert(gen_buffer->bo);                                 \
1015         gen_buffer->frame_width  = width ;                      \
1016         gen_buffer->frame_height = height;                      \
1017     } while (0)
1018
1019 static void
1020 vp9_update_segmentId_buffer(VADriverContextP ctx,
1021                           struct decode_state *decode_state,
1022                           struct gen9_hcpd_context *gen9_hcpd_context)
1023 {
1024     struct i965_driver_data *i965 = i965_driver_data(ctx);
1025     VADecPictureParameterBufferVP9 *pic_param;
1026
1027     int size = 0;
1028     int is_scaling = 0;
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1031
1032     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 1 ;
1033     size <<= 6;
1034     if (gen9_hcpd_context->vp9_segment_id_buffer.bo == NULL || pic_param->frame_width > gen9_hcpd_context->last_frame.frame_width || pic_param->frame_height > gen9_hcpd_context->last_frame.frame_height)
1035     {
1036         ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer), "vp9 segment id buffer", size);
1037     }
1038
1039     is_scaling = (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) || (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height);
1040
1041     if((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1042         pic_param->pic_fields.bits.error_resilient_mode ||
1043         pic_param->pic_fields.bits.intra_only || is_scaling) {
1044
1045         //VP9 Segment ID buffer needs to be zero
1046         dri_bo_map(gen9_hcpd_context->vp9_segment_id_buffer.bo,1);
1047         memset((unsigned char *)gen9_hcpd_context->vp9_segment_id_buffer.bo->virtual,0, size);
1048         dri_bo_unmap(gen9_hcpd_context->vp9_segment_id_buffer.bo);
1049     }
1050 }
1051
1052 static void
1053 vp9_update_mv_temporal_buffer(VADriverContextP ctx,
1054                           struct decode_state *decode_state,
1055                           struct gen9_hcpd_context *gen9_hcpd_context)
1056 {
1057     struct i965_driver_data *i965 = i965_driver_data(ctx);
1058     VADecPictureParameterBufferVP9 *pic_param;
1059     int size = 0;
1060
1061     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1062     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1063
1064     size = gen9_hcpd_context->picture_width_in_ctbs * gen9_hcpd_context->picture_height_in_ctbs * 9 ;
1065     size <<= 6; //CL aligned
1066     if (gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo == NULL || pic_param->frame_width > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width || pic_param->frame_height > gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height)
1067     {
1068         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_curr), "vp9 curr mv temporal buffer", size,pic_param->frame_width,pic_param->frame_height);
1069     }
1070     if (gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo == NULL)
1071     {
1072         ALLOC_MV_BUFFER((&gen9_hcpd_context->vp9_mv_temporal_buffer_last), "vp9 last mv temporal buffer", size,pic_param->frame_width,pic_param->frame_height);
1073     }
1074
1075 }
1076
1077 static void
1078 vp9_gen_default_probabilities(VADriverContextP ctx, struct gen9_hcpd_context *gen9_hcpd_context)
1079 {
1080     int i = 0;
1081     uint32_t size = 0;
1082
1083     size = sizeof(FRAME_CONTEXT);
1084     memset(&gen9_hcpd_context->vp9_fc_key_default,0,size);
1085     memset(&gen9_hcpd_context->vp9_fc_inter_default,0,size);
1086     memset(&gen9_hcpd_context->vp9_frame_ctx,0,size*FRAME_CONTEXTS);
1087     //more code to come here below
1088
1089     //1. key default
1090     gen9_hcpd_context->vp9_fc_key_default.tx_probs = default_tx_probs;
1091     //dummy 52
1092     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs4x4, default_coef_probs_4x4);
1093     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs8x8, default_coef_probs_8x8);
1094     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs16x16, default_coef_probs_16x16);
1095     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.coeff_probs32x32, default_coef_probs_32x32);
1096     //dummy 16
1097     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.skip_probs, default_skip_probs);
1098     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.partition_prob, vp9_kf_partition_probs);
1099     //dummy 47
1100     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.uv_mode_prob, vp9_kf_uv_mode_prob);
1101     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_tree_probs, default_seg_tree_probs);
1102     vp9_copy(gen9_hcpd_context->vp9_fc_key_default.seg_pred_probs, default_seg_pred_probs);
1103
1104     //2. inter default
1105     gen9_hcpd_context->vp9_fc_inter_default.tx_probs = default_tx_probs;
1106     //dummy 52
1107     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs4x4, default_coef_probs_4x4);
1108     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs8x8, default_coef_probs_8x8);
1109     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs16x16, default_coef_probs_16x16);
1110     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.coeff_probs32x32, default_coef_probs_32x32);
1111     //dummy 16
1112     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.skip_probs, default_skip_probs);
1113     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.inter_mode_probs, default_inter_mode_probs);
1114     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.switchable_interp_prob, default_switchable_interp_prob);
1115     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.intra_inter_prob, default_intra_inter_p);
1116     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_inter_prob, default_comp_inter_p);
1117     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.single_ref_prob, default_single_ref_p);
1118     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.comp_ref_prob, default_comp_ref_p);
1119     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.y_mode_prob, default_if_y_probs);
1120     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.partition_prob, default_partition_probs);
1121     gen9_hcpd_context->vp9_fc_inter_default.nmvc = default_nmv_context;
1122     //dummy 47
1123     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.uv_mode_prob, default_if_uv_probs);
1124     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_tree_probs, default_seg_tree_probs);
1125     vp9_copy(gen9_hcpd_context->vp9_fc_inter_default.seg_pred_probs, default_seg_pred_probs);
1126
1127     for(i = 0; i < FRAME_CONTEXTS; i++)
1128     {
1129         gen9_hcpd_context->vp9_frame_ctx[i] = gen9_hcpd_context->vp9_fc_inter_default;
1130     }
1131 }
1132
1133 static void
1134 vp9_update_probabilities(VADriverContextP ctx,
1135                           struct decode_state *decode_state,
1136                           struct gen9_hcpd_context *gen9_hcpd_context)
1137 {
1138     VADecPictureParameterBufferVP9 *pic_param;
1139     int i = 0;
1140
1141     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1142     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1143
1144     //first part buffer update: Case 1)Reset all 4 probablity buffers
1145    if((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||pic_param->pic_fields.bits.intra_only||pic_param->pic_fields.bits.error_resilient_mode)
1146     {
1147         if((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) ||
1148             (pic_param->pic_fields.bits.reset_frame_context == 3)||
1149             pic_param->pic_fields.bits.error_resilient_mode)
1150         {
1151             //perform full buffer update
1152             for(i = 0; i < FRAME_CONTEXTS; i++)
1153             {
1154                 memcpy(&gen9_hcpd_context->vp9_frame_ctx[i],&gen9_hcpd_context->vp9_fc_inter_default,VP9_PROB_BUFFER_FIRST_PART_SIZE);
1155
1156                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_tree_probs, default_seg_tree_probs);
1157                 vp9_copy(gen9_hcpd_context->vp9_frame_ctx[i].seg_pred_probs, default_seg_pred_probs);
1158             }
1159         }else if(pic_param->pic_fields.bits.reset_frame_context == 2&&pic_param->pic_fields.bits.intra_only)
1160         {
1161             memcpy(&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx],&gen9_hcpd_context->vp9_fc_inter_default,VP9_PROB_BUFFER_FIRST_PART_SIZE);
1162         }
1163         pic_param->pic_fields.bits.frame_context_idx = 0;
1164     }
1165
1166     //Case 3) Update only segment probabilities
1167     if((pic_param->pic_fields.bits.segmentation_enabled &&
1168         pic_param->pic_fields.bits.segmentation_update_map))
1169     {
1170         //Update seg_tree_probs and seg_pred_probs accordingly
1171         for (i=0; i<SEG_TREE_PROBS; i++)
1172         {
1173             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_tree_probs[i] = pic_param->mb_segment_tree_probs[i];
1174         }
1175         for (i=0; i<PREDICTION_PROBS; i++)
1176         {
1177             gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx].seg_pred_probs[i] = pic_param->segment_pred_probs[i];
1178         }
1179     }
1180
1181     //update vp9_fc according to frame_context_id
1182     {
1183         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1184         void *pprob = NULL;
1185
1186         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo,1);
1187
1188         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1189         memcpy(pprob,pfc,2048);
1190         //only update 343bytes for key or intra_only frame
1191         if(pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME ||
1192             pic_param->pic_fields.bits.intra_only)
1193         {
1194             memcpy(pprob + VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE
1195                     , gen9_hcpd_context->vp9_fc_key_default.inter_mode_probs
1196                     , VP9_PROB_BUFFER_KEY_INTER_SIZE);
1197         }
1198
1199         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1200     }
1201 }
1202
1203 static void
1204 gen9_hcpd_init_vp9_surface(VADriverContextP ctx,
1205                             VADecPictureParameterBufferVP9 *pic_param,
1206                             struct object_surface *obj_surface,
1207                             struct gen9_hcpd_context *gen9_hcpd_context)
1208 {
1209     GenVP9Surface *gen9_vp9_surface;
1210
1211     if (!obj_surface)
1212         return;
1213
1214     obj_surface->free_private_data = gen_free_vp9_surface;
1215     gen9_vp9_surface = obj_surface->private_data;
1216
1217     if (!gen9_vp9_surface) {
1218         gen9_vp9_surface = calloc(sizeof(GenVP9Surface), 1);
1219         assert(gen9_vp9_surface);
1220         gen9_vp9_surface->base.frame_store_id = -1;
1221         obj_surface->private_data = gen9_vp9_surface;
1222     }
1223
1224     gen9_vp9_surface->frame_width  = pic_param->frame_width;
1225     gen9_vp9_surface->frame_height = pic_param->frame_height;
1226
1227 }
1228
1229 static VAStatus
1230 gen9_hcpd_vp9_decode_init(VADriverContextP ctx,
1231                            struct decode_state *decode_state,
1232                            struct gen9_hcpd_context *gen9_hcpd_context)
1233 {
1234     struct i965_driver_data *i965 = i965_driver_data(ctx);
1235     VADecPictureParameterBufferVP9 *pic_param;
1236     struct object_surface *obj_surface;
1237     uint32_t size;
1238     int width_in_mbs=0, height_in_mbs=0;
1239     int bit_depth_minus8 = 0;
1240
1241     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1242     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1243
1244     width_in_mbs = (pic_param->frame_width + 15) / 16;
1245     height_in_mbs = (pic_param->frame_height + 15) / 16;
1246
1247     //For BXT, we support only till 4K
1248     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1249     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1250
1251     if(!(i965->codec_info->vp9_dec_profiles & (1U<<pic_param->profile)))
1252         return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1253
1254     if(pic_param->profile >= 2)
1255     {
1256         if(pic_param->bit_depth >= 8)
1257             bit_depth_minus8 = pic_param->bit_depth - 8;
1258
1259         if(bit_depth_minus8 == 2)
1260         {
1261             if(!(i965->codec_info->vp9_dec_chroma_formats & VA_RT_FORMAT_YUV420_10BPP))
1262                 return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1263         }
1264         else if((bit_depth_minus8 > 2) || (bit_depth_minus8 == 1) || (bit_depth_minus8 < 0))
1265             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
1266     }
1267
1268     //Update the frame store buffers with the reference frames information
1269     intel_update_vp9_frame_store_index(ctx,
1270                                         decode_state,
1271                                         pic_param,
1272                                         gen9_hcpd_context->reference_surfaces);
1273
1274     /* Current decoded picture */
1275     obj_surface = decode_state->render_object;
1276     //Ensure there is a tiled render surface in NV12 format. If not, create one.
1277     vp9_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1278
1279
1280     //Super block in VP9 is 64x64
1281     gen9_hcpd_context->ctb_size = 64;
1282     gen9_hcpd_context->min_cb_size = 8; //Min block size is 4 or 8?
1283
1284     //If picture width/height is not multiple of 64, needs to upsize it to the next 64 pixels
1285     //before calculation below.
1286     gen9_hcpd_context->picture_width_in_ctbs  = ALIGN(pic_param->frame_width, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1287     gen9_hcpd_context->picture_height_in_ctbs = ALIGN(pic_param->frame_height, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
1288
1289     gen9_hcpd_context->picture_width_in_min_cb_minus1  = ALIGN(pic_param->frame_width, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1290     gen9_hcpd_context->picture_height_in_min_cb_minus1 = ALIGN(pic_param->frame_height, gen9_hcpd_context->min_cb_size) / gen9_hcpd_context->min_cb_size - 1;
1291
1292     gen9_hcpd_context->picture_width_in_pixels  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1293     gen9_hcpd_context->picture_height_in_pixels = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1294
1295     gen9_hcpd_init_vp9_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
1296
1297     if(pic_param->profile >= 2)
1298         size = gen9_hcpd_context->picture_width_in_ctbs*36; //num_width_in_SB * 36
1299     else
1300         size = gen9_hcpd_context->picture_width_in_ctbs*18; //num_width_in_SB * 18
1301     size<<=6;
1302     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
1303     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1304
1305     if(pic_param->profile >= 2)
1306         size = gen9_hcpd_context->picture_height_in_ctbs*34; //num_height_in_SB * 17
1307     else
1308         size = gen9_hcpd_context->picture_height_in_ctbs*17; //num_height_in_SB * 17
1309     size<<=6;
1310     ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1311
1312     size = gen9_hcpd_context->picture_width_in_ctbs*5; //num_width_in_SB * 5
1313     size<<=6;
1314     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
1315     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1316
1317     size = gen9_hcpd_context->picture_height_in_ctbs*5; //num_height_in_SB * 5
1318     size<<=6;
1319     ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1320
1321     size =gen9_hcpd_context->picture_width_in_ctbs*1; //num_width_in_SB * 1
1322     size<<=6;
1323     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer), "hvd line rowstore buffer", size);
1324     ALLOC_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer), "hvd tile rowstore buffer", size);
1325
1326     size = 32;
1327     size<<=6;
1328     ALLOC_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer), "vp9 probability buffer", size);
1329
1330     gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
1331     gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
1332     gen9_hcpd_context->first_inter_slice_valid = 0;
1333
1334     return VA_STATUS_SUCCESS;
1335 }
1336
1337 static void
1338 gen9_hcpd_vp9_surface_state(VADriverContextP ctx,
1339                         struct decode_state *decode_state,
1340                         struct gen9_hcpd_context *gen9_hcpd_context)
1341 {
1342     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1343     struct object_surface *obj_surface = decode_state->render_object;
1344     struct object_surface *tmp_obj_surface = NULL;
1345     unsigned int y_cb_offset;
1346     int i = 0;
1347
1348     assert(obj_surface);
1349
1350     y_cb_offset = obj_surface->y_cb_offset;
1351
1352     BEGIN_BCS_BATCH(batch, 3);
1353
1354     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1355     OUT_BCS_BATCH(batch,
1356                   (0 << 28) |                   /* surface id */
1357                   (obj_surface->width - 1));    /* pitch - 1 */
1358     OUT_BCS_BATCH(batch,
1359                   (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010: SURFACE_FORMAT_PLANAR_420_8) << 28) |
1360                   y_cb_offset);
1361     ADVANCE_BCS_BATCH(batch);
1362
1363     tmp_obj_surface = obj_surface;
1364
1365     for(i = 0; i < 3; i++)
1366     {
1367         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1368         if (obj_surface && obj_surface->private_data)
1369         {
1370             BEGIN_BCS_BATCH(batch, 3);
1371
1372             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1373             OUT_BCS_BATCH(batch,
1374                 ((i + 2) << 28) |                   /* surface id */
1375                 (obj_surface->width - 1));    /* pitch - 1 */
1376             OUT_BCS_BATCH(batch,
1377                 (((obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010: SURFACE_FORMAT_PLANAR_420_8) << 28) |
1378                 obj_surface->y_cb_offset);
1379             ADVANCE_BCS_BATCH(batch);
1380         }else
1381         {
1382             BEGIN_BCS_BATCH(batch, 3);
1383
1384             OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
1385             OUT_BCS_BATCH(batch,
1386                 ((i + 2) << 28) |                   /* surface id */
1387                 (tmp_obj_surface->width - 1));    /* pitch - 1 */
1388             OUT_BCS_BATCH(batch,
1389                 (((tmp_obj_surface->fourcc == VA_FOURCC_P010) ? SURFACE_FORMAT_P010: SURFACE_FORMAT_PLANAR_420_8) << 28) |
1390                 tmp_obj_surface->y_cb_offset);
1391             ADVANCE_BCS_BATCH(batch);
1392         }
1393     }
1394 }
1395
1396 static void
1397 gen9_hcpd_vp9_pipe_buf_addr_state(VADriverContextP ctx,
1398                               struct decode_state *decode_state,
1399                               struct gen9_hcpd_context *gen9_hcpd_context)
1400 {
1401     struct i965_driver_data *i965 = i965_driver_data(ctx);
1402     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1403     struct object_surface *obj_surface;
1404     int i=0;
1405
1406     BEGIN_BCS_BATCH(batch, 95);
1407
1408     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
1409
1410     obj_surface = decode_state->render_object;
1411     assert(obj_surface && obj_surface->bo);
1412
1413     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
1414     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
1415     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
1416     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
1417     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo);         /* DW 13..15 */
1418     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
1419     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
1420     OUT_BUFFER_MA_TARGET(NULL);    /* DW 22..24, ignore for VP9 */
1421     OUT_BUFFER_MA_TARGET(NULL);    /* DW 25..27, ignore for VP9 */
1422     OUT_BUFFER_MA_TARGET(NULL);    /* DW 28..30, ignore for VP9 */
1423     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo); /* DW 31..33 */
1424
1425     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
1426
1427     /* DW 37..52 - Reference picture address */
1428     for (i = 0; i < 3; i++)
1429     {
1430         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1431
1432         if (obj_surface)
1433         {
1434             OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
1435         }
1436         else
1437             OUT_BUFFER_NMA_REFERENCE(NULL);
1438     }
1439     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++)
1440     {
1441        OUT_BUFFER_NMA_REFERENCE(NULL);
1442     }
1443     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
1444
1445     OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
1446     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, StreamOutEnable - used for transcoding */
1447     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, DecodedPictureStatusError, ignored */
1448     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, Ignored */
1449
1450     /* DW 66..81 - for 8 Collocated motion vectors */
1451     for (i = 0; i < 1; i++)
1452     {
1453         OUT_BUFFER_NMA_REFERENCE(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1454     }
1455     for (; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++)
1456     {
1457         OUT_BUFFER_NMA_REFERENCE(NULL);
1458     }
1459
1460     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
1461
1462     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_probability_buffer.bo); /* DW 83..85, VP9 Probability bufffer */
1463     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->vp9_segment_id_buffer.bo);  /* DW 86..88, VP9 Segment ID buffer */
1464     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_line_rowstore_buffer.bo);/* DW 89..91, VP9 HVD Line Rowstore buffer */
1465     OUT_BUFFER_MA_TARGET(gen9_hcpd_context->hvd_tile_rowstore_buffer.bo);/* DW 92..94, VP9 HVD Tile Rowstore buffer */
1466
1467     ADVANCE_BCS_BATCH(batch);
1468 }
1469
1470 static inline int
1471 gen9_hcpd_vp9_valid_ref_frame_size(int ref_width, int ref_height,
1472                                    int cur_width, int cur_height) {
1473   return 2 * cur_width >= ref_width &&
1474          2 * cur_height >= ref_height &&
1475          cur_width <= 16 * ref_width &&
1476          cur_height <= 16 * ref_height;
1477 }
1478 static void
1479 gen9_hcpd_vp9_pic_state(VADriverContextP ctx,
1480                        struct decode_state *decode_state,
1481                        struct gen9_hcpd_context *gen9_hcpd_context)
1482 {
1483     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1484     VADecPictureParameterBufferVP9 *pic_param;
1485     struct object_surface *obj_surface;
1486     GenVP9Surface *gen9_vp9_surface;
1487     uint16_t scale_h = 0;
1488     uint16_t scale_w = 0;
1489     uint16_t frame_width_in_pixel = 0;
1490     uint16_t frame_height_in_pixel = 0;
1491     uint16_t fwidth = 64;
1492     uint16_t fheight = 64;
1493     int i;
1494     int bit_depth_minus8 = 0;
1495
1496 #define LEN_COMMAND_OWN 12
1497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1498     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1499
1500     uint8_t segmentIDStreamInEnable = 0;
1501     uint8_t segmentIDStreamOutEnable = (pic_param->pic_fields.bits.segmentation_enabled && pic_param->pic_fields.bits.segmentation_update_map);
1502
1503     // For KEY_FRAME or INTRA_ONLY frame, this bit should be set to "0".
1504     uint8_t segmentation_temporal_update =
1505     ((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME) || (pic_param->pic_fields.bits.intra_only)) ? 0 : pic_param->pic_fields.bits.segmentation_temporal_update;
1506
1507
1508     if(pic_param->pic_fields.bits.intra_only || (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME))
1509     {
1510         segmentIDStreamInEnable = 1;
1511     }else if(pic_param->pic_fields.bits.segmentation_enabled)
1512     {
1513         if(!pic_param->pic_fields.bits.segmentation_update_map)
1514         {
1515             segmentIDStreamInEnable = 1;
1516
1517         }else if( pic_param->pic_fields.bits.segmentation_temporal_update)
1518         {
1519             segmentIDStreamInEnable = 1;
1520         }
1521     }
1522
1523     if(pic_param->pic_fields.bits.error_resilient_mode)
1524     {
1525         segmentIDStreamInEnable = 1;
1526     }
1527
1528     //frame type of previous frame (Key or Non-Key Frame).
1529     uint8_t last_frame_type = gen9_hcpd_context->last_frame.frame_type;
1530
1531     uint8_t use_pre_frame_mvs = 0;
1532     use_pre_frame_mvs = !((pic_param->pic_fields.bits.error_resilient_mode) ||
1533                                 (pic_param->frame_width != gen9_hcpd_context->last_frame.frame_width) ||
1534                                 (pic_param->frame_height != gen9_hcpd_context->last_frame.frame_height) ||
1535                                 (pic_param->pic_fields.bits.intra_only) ||
1536                                 (pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)||
1537                                 (gen9_hcpd_context->last_frame.intra_only)||
1538                                 (last_frame_type == HCP_VP9_KEY_FRAME) ||
1539                                 (!gen9_hcpd_context->last_frame.show_frame));
1540
1541     uint8_t adapt_probabilities_flag = 0;
1542     if((pic_param->pic_fields.bits.error_resilient_mode || pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1543         adapt_probabilities_flag = 0; //0: Do not adapt (error resilient or frame_parallel_mode are set)
1544     else if(!(pic_param->pic_fields.bits.error_resilient_mode) && !(pic_param->pic_fields.bits.frame_parallel_decoding_mode))
1545         adapt_probabilities_flag = 1; //1: Adapt (not error resilient and not frame_ parallel_mode)
1546
1547     frame_width_in_pixel  = (gen9_hcpd_context->picture_width_in_min_cb_minus1  + 1) * gen9_hcpd_context->min_cb_size ;
1548     frame_height_in_pixel = (gen9_hcpd_context->picture_height_in_min_cb_minus1 + 1) * gen9_hcpd_context->min_cb_size ;
1549
1550     fwidth = (fwidth > frame_width_in_pixel)?frame_width_in_pixel:fwidth;
1551     fheight = (fheight > frame_height_in_pixel)?frame_height_in_pixel:fheight;
1552
1553     if(pic_param->profile >= 2)
1554     {
1555         if(pic_param->bit_depth >= 8)
1556             bit_depth_minus8 = pic_param->bit_depth - 8;
1557     }
1558
1559     BEGIN_BCS_BATCH(batch, LEN_COMMAND_OWN);
1560
1561     OUT_BCS_BATCH(batch, HCP_VP9_PIC_STATE | (LEN_COMMAND_OWN - 2));
1562
1563     OUT_BCS_BATCH(batch,
1564                   (frame_height_in_pixel - 1) << 16 |
1565                   (frame_width_in_pixel - 1));         /* DW 1 */
1566     OUT_BCS_BATCH(batch,
1567                   segmentIDStreamInEnable << 31 |
1568                   segmentIDStreamOutEnable << 30 |
1569                   pic_param->pic_fields.bits.lossless_flag << 29 |
1570                   segmentation_temporal_update << 28 |
1571                   pic_param->pic_fields.bits.segmentation_update_map << 27 |
1572                   pic_param->pic_fields.bits.segmentation_enabled << 26   |
1573                   pic_param->sharpness_level << 23 |
1574                   pic_param->filter_level << 17 |
1575                   pic_param->pic_fields.bits.frame_parallel_decoding_mode << 16 |
1576                   pic_param->pic_fields.bits.error_resilient_mode << 15 |
1577                   pic_param->pic_fields.bits.refresh_frame_context << 14 |
1578                   last_frame_type << 13 |
1579                   0 << 12 |   /* tx select mode */
1580                   0 << 11 |   /* Hybrid Prediction Mode */
1581                   use_pre_frame_mvs << 10 |
1582                   pic_param->pic_fields.bits.alt_ref_frame_sign_bias << 9 |
1583                   pic_param->pic_fields.bits.golden_ref_frame_sign_bias << 8 |
1584                   pic_param->pic_fields.bits.last_ref_frame_sign_bias << 7 |
1585                   pic_param->pic_fields.bits.mcomp_filter_type << 4 |
1586                   pic_param->pic_fields.bits.allow_high_precision_mv << 3 |
1587                   pic_param->pic_fields.bits.intra_only <<2 |
1588                   adapt_probabilities_flag << 1 |
1589                   pic_param->pic_fields.bits.frame_type <<0);               /* DW 2 */
1590     OUT_BCS_BATCH(batch,
1591         pic_param->profile << 28 |
1592         bit_depth_minus8 << 24 |
1593         0 << 22 | /* only support 4:2:0 */
1594         pic_param->log2_tile_rows << 8 |
1595         pic_param->log2_tile_columns <<0);                       /* DW 3 */
1596     // resolution change case
1597
1598     // DW4-DW6
1599     for(i = 0; i < 3; i++)
1600     {
1601         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1602         gen9_vp9_surface = NULL;
1603         scale_w = 0;
1604         scale_h = 0;
1605         if (obj_surface && obj_surface->private_data)
1606         {
1607             gen9_vp9_surface = obj_surface->private_data;
1608             if(!gen9_hcpd_vp9_valid_ref_frame_size(gen9_vp9_surface->frame_width,gen9_vp9_surface->frame_height,pic_param->frame_width,pic_param->frame_height))
1609             {
1610                 scale_w = -1;
1611                 scale_h = -1;
1612             }else
1613             {
1614                 scale_w = (gen9_vp9_surface->frame_width  << 14) /pic_param->frame_width ;
1615                 scale_h = (gen9_vp9_surface->frame_height << 14) /pic_param->frame_height ;
1616             }
1617             OUT_BCS_BATCH(batch,
1618                 scale_w<<16 |
1619                 scale_h);
1620         }else
1621         {
1622             OUT_BCS_BATCH(batch, 0);
1623         }
1624     }
1625
1626     // DW7-DW9
1627     for(i = 0; i < 3; i++)
1628     {
1629         obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
1630         gen9_vp9_surface = NULL;
1631
1632         if (obj_surface && obj_surface->private_data)
1633         {
1634             gen9_vp9_surface = obj_surface->private_data;
1635             OUT_BCS_BATCH(batch,
1636                 ((gen9_vp9_surface->frame_height- 1)&0x3fff)<<16 |
1637                 ((gen9_vp9_surface->frame_width - 1)&0x3fff));
1638         }else
1639         {
1640             OUT_BCS_BATCH(batch, 0);
1641         }
1642     }
1643
1644     OUT_BCS_BATCH(batch,
1645                   pic_param->first_partition_size << 16 |
1646                   pic_param->frame_header_length_in_bytes <<0); /* DW 10 */
1647     OUT_BCS_BATCH(batch,
1648                   (0 << 3) |
1649                   (0 << 2) |
1650                   (1 << 1) |
1651                   (0 << 0)); /* DW 11, ignored */
1652     //Rest of the DWs are not valid for BXT
1653     for(i = 12; i < LEN_COMMAND_OWN; i++)
1654     {
1655         OUT_BCS_BATCH(batch, 0);
1656     }
1657
1658     ADVANCE_BCS_BATCH(batch);
1659
1660 }
1661
1662 static void
1663 gen9_hcpd_vp9_segment_state(VADriverContextP ctx,
1664                             VADecPictureParameterBufferVP9 *pic_param,
1665                             VASegmentParameterVP9 *seg_param, uint8_t seg_id,
1666                             struct gen9_hcpd_context *gen9_hcpd_context)
1667 {
1668     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1669
1670     int segment_ref = seg_param->segment_flags.fields.segment_reference;
1671
1672     if((pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME)
1673             || (pic_param->pic_fields.bits.intra_only))
1674         segment_ref = 0;
1675
1676     BEGIN_BCS_BATCH(batch, 7);
1677
1678     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (7 - 2));
1679     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
1680     OUT_BCS_BATCH(batch,
1681                   seg_param->segment_flags.fields.segment_reference_enabled << 3 |
1682                   segment_ref << 1 |
1683                   seg_param->segment_flags.fields.segment_reference_skipped <<0 ); /* DW 2 */
1684     if(pic_param->filter_level)
1685     {
1686         OUT_BCS_BATCH(batch,
1687             seg_param->filter_level[1][1] << 24    | //FilterLevelRef1Mode1
1688             seg_param->filter_level[1][0] << 16    | //FilterLevelRef1Mode0
1689             seg_param->filter_level[0][1] << 8     | //FilterLevelRef0Mode1
1690             seg_param->filter_level[0][0] << 0 );     //FilterLevelRef0Mode0 /* DW 3 */
1691         OUT_BCS_BATCH(batch,
1692             seg_param->filter_level[3][1] << 24    | //FilterLevelRef3Mode1
1693             seg_param->filter_level[3][0] << 16    | //FilterLevelRef3Mode0
1694             seg_param->filter_level[2][1] << 8     | //FilterLevelRef2Mode1
1695             seg_param->filter_level[2][0] << 0 );    //FilterLevelRef2Mode0 /* DW 4 */
1696     }else
1697     {
1698         OUT_BCS_BATCH(batch,
1699             0 );      /* DW 3 */
1700         OUT_BCS_BATCH(batch,
1701             0 );      /* DW 4 */
1702     }
1703     OUT_BCS_BATCH(batch,
1704                   seg_param->luma_ac_quant_scale << 16   |
1705                   seg_param->luma_dc_quant_scale << 0 );    /* DW 5 */
1706     OUT_BCS_BATCH(batch,
1707                   seg_param->chroma_ac_quant_scale << 16 |
1708                   seg_param->chroma_dc_quant_scale << 0 );  /* DW 6 */
1709
1710     ADVANCE_BCS_BATCH(batch);
1711
1712 }
1713
1714 static void
1715 gen9_hcpd_vp9_bsd_object(VADriverContextP ctx,
1716                      VADecPictureParameterBufferVP9 *pic_param,
1717                      VASliceParameterBufferVP9 *slice_param,
1718                      struct gen9_hcpd_context *gen9_hcpd_context)
1719 {
1720     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1721     int slice_data_size   = slice_param->slice_data_size - pic_param->frame_header_length_in_bytes;
1722     int slice_data_offset = slice_param->slice_data_offset + pic_param->frame_header_length_in_bytes;
1723
1724     BEGIN_BCS_BATCH(batch, 3);
1725
1726     OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
1727
1728     OUT_BCS_BATCH(batch, slice_data_size );
1729     OUT_BCS_BATCH(batch, slice_data_offset);
1730
1731     ADVANCE_BCS_BATCH(batch);
1732
1733 }
1734
1735 static VAStatus
1736 gen9_hcpd_vp9_decode_picture(VADriverContextP ctx,
1737                               struct decode_state *decode_state,
1738                               struct gen9_hcpd_context *gen9_hcpd_context)
1739 {
1740     VAStatus vaStatus = VA_STATUS_SUCCESS;
1741     struct i965_driver_data *i965 = i965_driver_data(ctx);
1742     struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
1743     VADecPictureParameterBufferVP9 *pic_param;
1744     VASliceParameterBufferVP9 *slice_param;
1745     dri_bo *slice_data_bo;
1746     dri_bo *tmp_bo;
1747     uint16_t tmp;
1748     int i = 0, num_segments=0;
1749
1750     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1751     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
1752     slice_param = (VASliceParameterBufferVP9 *)decode_state->slice_params[0]->buffer;
1753     pic_param = (VADecPictureParameterBufferVP9 *)decode_state->pic_param->buffer;
1754
1755     if(slice_param->slice_data_size == 1)
1756     {
1757         goto out;
1758     }
1759
1760     vaStatus = gen9_hcpd_vp9_decode_init(ctx, decode_state, gen9_hcpd_context);
1761
1762     if (vaStatus != VA_STATUS_SUCCESS)
1763         goto out;
1764
1765     //Update segment id buffer if needed
1766     vp9_update_segmentId_buffer(ctx, decode_state, gen9_hcpd_context);
1767     //Update mv buffer if needed
1768     vp9_update_mv_temporal_buffer(ctx, decode_state, gen9_hcpd_context);
1769     //Update probability buffer if needed
1770     vp9_update_probabilities(ctx, decode_state, gen9_hcpd_context);
1771
1772     if (i965->intel.has_bsd2)
1773         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
1774     else
1775         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1776     intel_batchbuffer_emit_mi_flush(batch);
1777
1778     gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_VP9, gen9_hcpd_context);
1779     //Not sure what the surface id value should be: Gold? ALtRef? PrevRef? or Just RefPic?
1780     gen9_hcpd_vp9_surface_state(ctx, decode_state, gen9_hcpd_context);
1781
1782     //Only one VASliceParameterBufferVP9 should be sent per frame
1783     slice_data_bo = decode_state->slice_datas[0]->bo;
1784
1785     gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
1786
1787     gen9_hcpd_vp9_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
1788     //If segmentation is disabled, only SegParam[0] is valid,
1789     //all others should be populated with 0
1790     if(!pic_param->pic_fields.bits.segmentation_enabled)
1791         num_segments = 1;
1792     else  //If segmentation is enabled, all 8 entries should be valid.
1793         num_segments = 8;
1794
1795     for(i=0; i<num_segments; i++) {
1796         VASegmentParameterVP9 seg_param = slice_param->seg_param[i];
1797         gen9_hcpd_vp9_segment_state(ctx, pic_param, &seg_param, i, gen9_hcpd_context);
1798     }
1799
1800     gen9_hcpd_vp9_pic_state(ctx, decode_state, gen9_hcpd_context);
1801
1802     gen9_hcpd_vp9_bsd_object(ctx, pic_param, slice_param, gen9_hcpd_context);
1803
1804     intel_batchbuffer_end_atomic(batch);
1805     intel_batchbuffer_flush(batch);
1806
1807     // keep track of the last frame status
1808     gen9_hcpd_context->last_frame.frame_width = pic_param->frame_width;
1809     gen9_hcpd_context->last_frame.frame_height = pic_param->frame_height;
1810     gen9_hcpd_context->last_frame.show_frame = pic_param->pic_fields.bits.show_frame;
1811     gen9_hcpd_context->last_frame.frame_type = pic_param->pic_fields.bits.frame_type;
1812     gen9_hcpd_context->last_frame.refresh_frame_context = pic_param->pic_fields.bits.refresh_frame_context;
1813     gen9_hcpd_context->last_frame.frame_context_idx = pic_param->pic_fields.bits.frame_context_idx;
1814     gen9_hcpd_context->last_frame.intra_only = pic_param->pic_fields.bits.intra_only;
1815
1816     // switch mv buffer
1817     if(pic_param->pic_fields.bits.frame_type != HCP_VP9_KEY_FRAME)
1818     {
1819         tmp_bo = gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo;
1820         gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo;
1821         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo = tmp_bo;
1822
1823         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width;
1824         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_width = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width;
1825         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_width = tmp;
1826
1827         tmp = gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height;
1828         gen9_hcpd_context->vp9_mv_temporal_buffer_last.frame_height = gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height;
1829         gen9_hcpd_context->vp9_mv_temporal_buffer_curr.frame_height = tmp;
1830
1831
1832     }
1833     //update vp9_frame_ctx according to frame_context_id
1834     if (pic_param->pic_fields.bits.refresh_frame_context)
1835     {
1836         void *pfc = (void *)&gen9_hcpd_context->vp9_frame_ctx[pic_param->pic_fields.bits.frame_context_idx];
1837         void *pprob = NULL;
1838
1839         //update vp9_fc to frame_context
1840         dri_bo_map(gen9_hcpd_context->vp9_probability_buffer.bo,1);
1841         pprob = (void *)gen9_hcpd_context->vp9_probability_buffer.bo->virtual;
1842         if(pic_param->pic_fields.bits.frame_type == HCP_VP9_KEY_FRAME||
1843                 pic_param->pic_fields.bits.intra_only)
1844             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE - VP9_PROB_BUFFER_KEY_INTER_SIZE);
1845         else
1846             memcpy(pfc, pprob, VP9_PROB_BUFFER_FIRST_PART_SIZE);
1847
1848         dri_bo_unmap(gen9_hcpd_context->vp9_probability_buffer.bo);
1849     }
1850
1851 out:
1852     return vaStatus;
1853 }
1854
1855
1856 static VAStatus
1857 gen9_hcpd_decode_picture(VADriverContextP ctx,
1858                          VAProfile profile,
1859                          union codec_state *codec_state,
1860                          struct hw_context *hw_context)
1861 {
1862     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1863     struct decode_state *decode_state = &codec_state->decode;
1864     VAStatus vaStatus;
1865
1866     assert(gen9_hcpd_context);
1867
1868     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
1869
1870     if (vaStatus != VA_STATUS_SUCCESS)
1871         goto out;
1872
1873     switch (profile) {
1874     case VAProfileHEVCMain:
1875     case VAProfileHEVCMain10:
1876         vaStatus = gen9_hcpd_hevc_decode_picture(ctx, decode_state, gen9_hcpd_context);
1877         break;
1878     case VAProfileVP9Profile0:
1879     case VAProfileVP9Profile2:
1880         vaStatus = gen9_hcpd_vp9_decode_picture(ctx, decode_state, gen9_hcpd_context);
1881         break;
1882
1883     default:
1884         /* should never get here 1!! */
1885         assert(0);
1886         break;
1887     }
1888
1889 out:
1890     return vaStatus;
1891 }
1892
1893 static void
1894 gen9_hcpd_context_destroy(void *hw_context)
1895 {
1896     struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
1897
1898     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer));
1899     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer));
1900     FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer));
1901     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer));
1902     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer));
1903     FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer));
1904     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer));
1905     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer));
1906     FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer));
1907     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_line_rowstore_buffer));
1908     FREE_GEN_BUFFER((&gen9_hcpd_context->hvd_tile_rowstore_buffer));
1909     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_probability_buffer));
1910     FREE_GEN_BUFFER((&gen9_hcpd_context->vp9_segment_id_buffer));
1911     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_curr.bo);
1912     dri_bo_unreference(gen9_hcpd_context->vp9_mv_temporal_buffer_last.bo);
1913
1914     intel_batchbuffer_free(gen9_hcpd_context->base.batch);
1915     free(gen9_hcpd_context);
1916 }
1917
1918 static void
1919 gen9_hcpd_hevc_context_init(VADriverContextP ctx,
1920                             struct gen9_hcpd_context *gen9_hcpd_context)
1921 {
1922     hevc_gen_default_iq_matrix(&gen9_hcpd_context->iq_matrix_hevc);
1923 }
1924
1925 static void
1926 gen9_hcpd_vp9_context_init(VADriverContextP ctx,
1927                             struct gen9_hcpd_context *gen9_hcpd_context)
1928 {
1929
1930     gen9_hcpd_context->last_frame.frame_height  = 0;
1931     gen9_hcpd_context->last_frame.show_frame    = 0;
1932     gen9_hcpd_context->last_frame.frame_type    = 0;
1933     gen9_hcpd_context->last_frame.refresh_frame_context = 0;
1934     gen9_hcpd_context->last_frame.frame_context_idx = 0;
1935     gen9_hcpd_context->last_frame.intra_only = 0;
1936     gen9_hcpd_context->last_frame.prob_buffer_saved_flag = 0;
1937     gen9_hcpd_context->last_frame.prob_buffer_restored_flag = 0;
1938
1939     //Super block in VP9 is 64x64
1940     gen9_hcpd_context->ctb_size = 64;
1941     gen9_hcpd_context->min_cb_size = 8; //Min block size is 8
1942
1943     vp9_gen_default_probabilities(ctx, gen9_hcpd_context);
1944 }
1945
1946 static struct hw_context *
1947 gen9_hcpd_context_init(VADriverContextP ctx, struct object_config *object_config)
1948 {
1949     struct intel_driver_data *intel = intel_driver_data(ctx);
1950     struct gen9_hcpd_context *gen9_hcpd_context = calloc(1, sizeof(struct gen9_hcpd_context));
1951     int i;
1952
1953     if (!gen9_hcpd_context)
1954         return NULL;
1955
1956     gen9_hcpd_context->base.destroy = gen9_hcpd_context_destroy;
1957     gen9_hcpd_context->base.run = gen9_hcpd_decode_picture;
1958     gen9_hcpd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
1959
1960     for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
1961         gen9_hcpd_context->reference_surfaces[i].surface_id = VA_INVALID_ID;
1962         gen9_hcpd_context->reference_surfaces[i].frame_store_id = -1;
1963         gen9_hcpd_context->reference_surfaces[i].obj_surface = NULL;
1964     }
1965
1966     switch (object_config->profile) {
1967     case VAProfileHEVCMain:
1968     case VAProfileHEVCMain10:
1969         gen9_hcpd_hevc_context_init(ctx, gen9_hcpd_context);
1970         break;
1971     case VAProfileVP9Profile0:
1972     case VAProfileVP9Profile2:
1973         gen9_hcpd_vp9_context_init(ctx, gen9_hcpd_context);
1974         break;
1975
1976     default:
1977         break;
1978     }
1979
1980     return (struct hw_context *)gen9_hcpd_context;
1981 }
1982
1983 struct hw_context *
1984 gen9_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
1985 {
1986     if (obj_config->profile == VAProfileHEVCMain ||
1987         obj_config->profile == VAProfileHEVCMain10 ||
1988         obj_config->profile == VAProfileVP9Profile0 ||
1989         obj_config->profile == VAProfileVP9Profile2) {
1990         return gen9_hcpd_context_init(ctx, obj_config);
1991     } else {
1992         return gen8_dec_hw_context_init(ctx, obj_config);
1993     }
1994 }
1995
1996 void gen9_max_resolution(struct i965_driver_data *i965,
1997                          struct object_config *obj_config,
1998                          int *w,                                /* out */
1999                          int *h)                                /* out */
2000 {
2001     if (obj_config->profile == VAProfileJPEGBaseline) {
2002         *w = 8192;
2003         *h = 8192;
2004     } else {
2005         *w = i965->codec_info->max_width;
2006         *h = i965->codec_info->max_height;
2007     }
2008 }