2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
32 #include <va/va_dec_hevc.h>
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "i965_decoder_utils.h"
41 #include "intel_media.h"
43 #define OUT_BUFFER(buf_bo, is_target, ma) do { \
45 OUT_BCS_RELOC(batch, \
47 I915_GEM_DOMAIN_RENDER, \
48 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
51 OUT_BCS_BATCH(batch, 0); \
53 OUT_BCS_BATCH(batch, 0); \
55 OUT_BCS_BATCH(batch, 0); \
58 #define OUT_BUFFER_MA_TARGET(buf_bo) OUT_BUFFER(buf_bo, 1, 1)
59 #define OUT_BUFFER_MA_REFERENCE(buf_bo) OUT_BUFFER(buf_bo, 0, 1)
60 #define OUT_BUFFER_NMA_TARGET(buf_bo) OUT_BUFFER(buf_bo, 1, 0)
61 #define OUT_BUFFER_NMA_REFERENCE(buf_bo) OUT_BUFFER(buf_bo, 0, 0)
64 gen9_hcpd_init_hevc_surface(VADriverContextP ctx,
65 VAPictureParameterBufferHEVC *pic_param,
66 struct object_surface *obj_surface,
67 struct gen9_hcpd_context *gen9_hcpd_context)
69 struct i965_driver_data *i965 = i965_driver_data(ctx);
70 GenHevcSurface *gen9_hevc_surface;
75 obj_surface->free_private_data = gen_free_hevc_surface;
76 gen9_hevc_surface = obj_surface->private_data;
78 if (!gen9_hevc_surface) {
79 gen9_hevc_surface = calloc(sizeof(GenHevcSurface), 1);
80 gen9_hevc_surface->base.frame_store_id = -1;
81 obj_surface->private_data = gen9_hevc_surface;
84 if (gen9_hevc_surface->motion_vector_temporal_bo == NULL) {
87 if (gen9_hcpd_context->ctb_size == 16)
88 size = ((gen9_hcpd_context->picture_width_in_pixels + 63) >> 6) *
89 ((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4);
91 size = ((gen9_hcpd_context->picture_width_in_pixels + 31) >> 5) *
92 ((gen9_hcpd_context->picture_height_in_pixels + 31) >> 5);
94 size <<= 6; /* in unit of 64bytes */
95 gen9_hevc_surface->motion_vector_temporal_bo = dri_bo_alloc(i965->intel.bufmgr,
96 "motion vector temporal buffer",
103 gen9_hcpd_hevc_decode_init(VADriverContextP ctx,
104 struct decode_state *decode_state,
105 struct gen9_hcpd_context *gen9_hcpd_context)
107 struct i965_driver_data *i965 = i965_driver_data(ctx);
108 VAPictureParameterBufferHEVC *pic_param;
109 struct object_surface *obj_surface;
112 assert(decode_state->pic_param && decode_state->pic_param->buffer);
113 pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
114 intel_update_hevc_frame_store_index(ctx,
117 gen9_hcpd_context->reference_surfaces,
118 &gen9_hcpd_context->fs_ctx);
120 gen9_hcpd_context->picture_width_in_pixels = pic_param->pic_width_in_luma_samples;
121 gen9_hcpd_context->picture_height_in_pixels = pic_param->pic_height_in_luma_samples;
122 gen9_hcpd_context->ctb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 +
124 pic_param->log2_diff_max_min_luma_coding_block_size));
125 gen9_hcpd_context->picture_width_in_ctbs = ALIGN(gen9_hcpd_context->picture_width_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
126 gen9_hcpd_context->picture_height_in_ctbs = ALIGN(gen9_hcpd_context->picture_height_in_pixels, gen9_hcpd_context->ctb_size) / gen9_hcpd_context->ctb_size;
127 gen9_hcpd_context->min_cb_size = (1 << (pic_param->log2_min_luma_coding_block_size_minus3 + 3));
128 gen9_hcpd_context->picture_width_in_min_cb_minus1 = gen9_hcpd_context->picture_width_in_pixels / gen9_hcpd_context->min_cb_size - 1;
129 gen9_hcpd_context->picture_height_in_min_cb_minus1 = gen9_hcpd_context->picture_height_in_pixels / gen9_hcpd_context->min_cb_size - 1;
131 /* Current decoded picture */
132 obj_surface = decode_state->render_object;
133 hevc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
134 gen9_hcpd_init_hevc_surface(ctx, pic_param, obj_surface, gen9_hcpd_context);
136 size = ALIGN(gen9_hcpd_context->picture_width_in_pixels, 32) >> 3;
138 ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer), "line buffer", size);
139 ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
141 size = ALIGN(gen9_hcpd_context->picture_height_in_pixels + 6 * gen9_hcpd_context->picture_height_in_ctbs, 32) >> 3;
143 ALLOC_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
145 size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 188 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
147 ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer), "metadata line buffer", size);
149 size = (((gen9_hcpd_context->picture_width_in_pixels + 15) >> 4) * 172 + 9 * gen9_hcpd_context->picture_width_in_ctbs + 1023) >> 9;
151 ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
153 if (IS_CHERRYVIEW(i965->intel.device_info))
154 size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 256 + 9 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
156 size = (((gen9_hcpd_context->picture_height_in_pixels + 15) >> 4) * 176 + 89 * gen9_hcpd_context->picture_height_in_ctbs + 1023) >> 9;
158 ALLOC_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
160 size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 3 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> 3;
162 ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer), "sao line buffer", size);
164 size = ALIGN(((gen9_hcpd_context->picture_width_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_width_in_ctbs), 16) >> 3;
166 ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer), "sao tile line buffer", size);
168 size = ALIGN(((gen9_hcpd_context->picture_height_in_pixels >> 1) + 6 * gen9_hcpd_context->picture_height_in_ctbs), 16) >> 3;
170 ALLOC_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer), "sao tile column buffer", size);
172 gen9_hcpd_context->first_inter_slice_collocated_ref_idx = 0;
173 gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = 0;
174 gen9_hcpd_context->first_inter_slice_valid = 0;
176 return VA_STATUS_SUCCESS;
180 gen9_hcpd_pipe_mode_select(VADriverContextP ctx,
181 struct decode_state *decode_state,
183 struct gen9_hcpd_context *gen9_hcpd_context)
185 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
187 assert(codec == HCP_CODEC_HEVC);
189 BEGIN_BCS_BATCH(batch, 4);
191 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
194 (0 << 3) | /* disable Pic Status / Error Report */
195 HCP_CODEC_SELECT_DECODE);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
199 ADVANCE_BCS_BATCH(batch);
203 gen9_hcpd_surface_state(VADriverContextP ctx,
204 struct decode_state *decode_state,
205 struct gen9_hcpd_context *gen9_hcpd_context)
207 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
208 struct object_surface *obj_surface = decode_state->render_object;
209 unsigned int y_cb_offset;
213 y_cb_offset = obj_surface->y_cb_offset;
215 BEGIN_BCS_BATCH(batch, 3);
217 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
219 (0 << 28) | /* surface id */
220 (obj_surface->width - 1)); /* pitch - 1 */
222 (SURFACE_FORMAT_PLANAR_420_8 << 28) |
225 ADVANCE_BCS_BATCH(batch);
229 gen9_hcpd_pipe_buf_addr_state(VADriverContextP ctx,
230 struct decode_state *decode_state,
231 struct gen9_hcpd_context *gen9_hcpd_context)
233 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
234 struct object_surface *obj_surface;
235 GenHevcSurface *gen9_hevc_surface;
238 BEGIN_BCS_BATCH(batch, 95);
240 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
242 obj_surface = decode_state->render_object;
243 assert(obj_surface && obj_surface->bo);
244 gen9_hevc_surface = obj_surface->private_data;
245 assert(gen9_hevc_surface && gen9_hevc_surface->motion_vector_temporal_bo);
247 OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
248 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
249 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
250 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
251 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_line_buffer.bo); /* DW 13..15 */
252 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_line_buffer.bo); /* DW 16..18 */
253 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->metadata_tile_column_buffer.bo); /* DW 19..21 */
254 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_line_buffer.bo); /* DW 22..24 */
255 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_line_buffer.bo); /* DW 25..27 */
256 OUT_BUFFER_MA_TARGET(gen9_hcpd_context->sao_tile_column_buffer.bo); /* DW 28..30 */
257 OUT_BUFFER_MA_TARGET(gen9_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
258 OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
260 for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
261 obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
264 OUT_BUFFER_NMA_REFERENCE(obj_surface->bo);
266 OUT_BUFFER_NMA_REFERENCE(NULL);
268 OUT_BCS_BATCH(batch, 0); /* DW 53, memory address attributes */
270 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 54..56, ignore for decoding mode */
271 OUT_BUFFER_MA_TARGET(NULL);
272 OUT_BUFFER_MA_TARGET(NULL);
273 OUT_BUFFER_MA_TARGET(NULL);
275 for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
276 obj_surface = gen9_hcpd_context->reference_surfaces[i].obj_surface;
277 gen9_hevc_surface = NULL;
279 if (obj_surface && obj_surface->private_data)
280 gen9_hevc_surface = obj_surface->private_data;
282 if (gen9_hevc_surface)
283 OUT_BUFFER_NMA_REFERENCE(gen9_hevc_surface->motion_vector_temporal_bo);
285 OUT_BUFFER_NMA_REFERENCE(NULL);
287 OUT_BCS_BATCH(batch, 0); /* DW 82, memory address attributes */
289 OUT_BUFFER_MA_TARGET(NULL); /* DW 83..85, ignore for HEVC */
290 OUT_BUFFER_MA_TARGET(NULL); /* DW 86..88, ignore for HEVC */
291 OUT_BUFFER_MA_TARGET(NULL); /* DW 89..91, ignore for HEVC */
292 OUT_BUFFER_MA_TARGET(NULL); /* DW 92..94, ignore for HEVC */
294 ADVANCE_BCS_BATCH(batch);
298 gen9_hcpd_ind_obj_base_addr_state(VADriverContextP ctx,
299 dri_bo *slice_data_bo,
300 struct gen9_hcpd_context *gen9_hcpd_context)
302 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
304 BEGIN_BCS_BATCH(batch, 14);
306 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
307 OUT_BUFFER_MA_REFERENCE(slice_data_bo); /* DW 1..3 */
308 OUT_BUFFER_NMA_REFERENCE(NULL); /* DW 4..5, Upper Bound */
309 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 6..8, CU, ignored */
310 OUT_BUFFER_MA_TARGET(NULL); /* DW 9..11, PAK-BSE, ignored */
311 OUT_BUFFER_NMA_TARGET(NULL); /* DW 12..13, Upper Bound */
313 ADVANCE_BCS_BATCH(batch);
317 gen9_hcpd_qm_state(VADriverContextP ctx,
324 struct gen9_hcpd_context *gen9_hcpd_context)
326 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
327 unsigned char qm_buffer[64];
329 assert(qm_length <= 64);
330 memset(qm_buffer, 0, sizeof(qm_buffer));
331 memcpy(qm_buffer, qm, qm_length);
333 BEGIN_BCS_BATCH(batch, 18);
335 OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
338 color_component << 3 |
341 intel_batchbuffer_data(batch, qm_buffer, 64);
343 ADVANCE_BCS_BATCH(batch);
347 gen9_hcpd_hevc_qm_state(VADriverContextP ctx,
348 struct decode_state *decode_state,
349 struct gen9_hcpd_context *gen9_hcpd_context)
351 VAIQMatrixBufferHEVC *iq_matrix;
352 VAPictureParameterBufferHEVC *pic_param;
355 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
356 iq_matrix = (VAIQMatrixBufferHEVC *)decode_state->iq_matrix->buffer;
358 iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
360 assert(decode_state->pic_param && decode_state->pic_param->buffer);
361 pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
363 if (!pic_param->pic_fields.bits.scaling_list_enabled_flag)
364 iq_matrix = &gen9_hcpd_context->iq_matrix_hevc;
366 for (i = 0; i < 6; i++) {
367 gen9_hcpd_qm_state(ctx,
369 iq_matrix->ScalingList4x4[i], 16,
373 for (i = 0; i < 6; i++) {
374 gen9_hcpd_qm_state(ctx,
376 iq_matrix->ScalingList8x8[i], 64,
380 for (i = 0; i < 6; i++) {
381 gen9_hcpd_qm_state(ctx,
382 2, i % 3, i / 3, iq_matrix->ScalingListDC16x16[i],
383 iq_matrix->ScalingList16x16[i], 64,
387 for (i = 0; i < 2; i++) {
388 gen9_hcpd_qm_state(ctx,
389 3, 0, i % 2, iq_matrix->ScalingListDC32x32[i],
390 iq_matrix->ScalingList32x32[i], 64,
396 gen9_hcpd_pic_state(VADriverContextP ctx,
397 struct decode_state *decode_state,
398 struct gen9_hcpd_context *gen9_hcpd_context)
400 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
401 VAPictureParameterBufferHEVC *pic_param;
402 int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
403 int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
407 * When not present, the value of loop_filter_across_tiles_enabled_flag
408 * is inferred to be equal to 1.
410 int loop_filter_across_tiles_enabled_flag = 1;
412 assert(decode_state->pic_param && decode_state->pic_param->buffer);
413 pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
415 if (pic_param->pic_fields.bits.pcm_enabled_flag) {
416 max_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3 +
417 pic_param->log2_diff_max_min_pcm_luma_coding_block_size;
418 min_pcm_size_minus3 = pic_param->log2_min_pcm_luma_coding_block_size_minus3;
419 pcm_sample_bit_depth_luma_minus1 = (pic_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
420 pcm_sample_bit_depth_chroma_minus1 = (pic_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
422 max_pcm_size_minus3 = MIN(pic_param->log2_min_luma_coding_block_size_minus3 + pic_param->log2_diff_max_min_luma_coding_block_size, 2);
425 if (pic_param->pic_fields.bits.tiles_enabled_flag)
426 loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
428 BEGIN_BCS_BATCH(batch, 19);
430 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
433 gen9_hcpd_context->picture_height_in_min_cb_minus1 << 16 |
434 gen9_hcpd_context->picture_width_in_min_cb_minus1);
436 max_pcm_size_minus3 << 10 |
437 min_pcm_size_minus3 << 8 |
438 (pic_param->log2_min_transform_block_size_minus2 +
439 pic_param->log2_diff_max_min_transform_block_size) << 6 |
440 pic_param->log2_min_transform_block_size_minus2 << 4 |
441 (pic_param->log2_min_luma_coding_block_size_minus3 +
442 pic_param->log2_diff_max_min_luma_coding_block_size) << 2 |
443 pic_param->log2_min_luma_coding_block_size_minus3);
444 OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
447 pic_param->pic_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
448 pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
449 pic_param->pic_fields.bits.amp_enabled_flag << 23 |
450 pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
451 !(pic_param->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 21 |
452 !!(pic_param->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) << 20 |
453 pic_param->pic_fields.bits.weighted_pred_flag << 19 |
454 pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
455 pic_param->pic_fields.bits.tiles_enabled_flag << 17 |
456 pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
457 loop_filter_across_tiles_enabled_flag << 15 |
458 pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |
459 pic_param->log2_parallel_merge_level_minus2 << 10 |
460 pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |
461 pic_param->pic_fields.bits.pcm_loop_filter_disabled_flag << 8 |
462 (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |
463 pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |
464 pic_param->pic_fields.bits.pcm_enabled_flag << 4 |
465 pic_param->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag << 3 |
468 pcm_sample_bit_depth_luma_minus1 << 20 |
469 pcm_sample_bit_depth_chroma_minus1 << 16 |
470 pic_param->max_transform_hierarchy_depth_inter << 13 |
471 pic_param->max_transform_hierarchy_depth_intra << 10 |
472 (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
473 (pic_param->pps_cb_qp_offset & 0x1f));
477 OUT_BCS_BATCH(batch, 0);
478 OUT_BCS_BATCH(batch, 0);
479 OUT_BCS_BATCH(batch, 0);
480 OUT_BCS_BATCH(batch, 0); /* DW 10 */
481 OUT_BCS_BATCH(batch, 0);
482 OUT_BCS_BATCH(batch, 0);
483 OUT_BCS_BATCH(batch, 0);
484 OUT_BCS_BATCH(batch, 0);
485 OUT_BCS_BATCH(batch, 0); /* DW 15 */
486 OUT_BCS_BATCH(batch, 0);
487 OUT_BCS_BATCH(batch, 0);
488 OUT_BCS_BATCH(batch, 0);
490 ADVANCE_BCS_BATCH(batch);
494 gen9_hcpd_tile_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen9_hcpd_context *gen9_hcpd_context)
498 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
499 VAPictureParameterBufferHEVC *pic_param;
500 uint8_t pos_col[20], pos_row[24];
503 assert(decode_state->pic_param && decode_state->pic_param->buffer);
504 pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
506 memset(pos_col, 0, sizeof(pos_col));
507 memset(pos_row, 0, sizeof(pos_row));
509 for (i = 0; i <= MIN(pic_param->num_tile_columns_minus1, 18); i++)
510 pos_col[i + 1] = pos_col[i] + pic_param->column_width_minus1[i] + 1;
512 for (i = 0; i <= MIN(pic_param->num_tile_rows_minus1, 20); i++)
513 pos_row[i + 1] = pos_row[i] + pic_param->row_height_minus1[i] + 1;
515 BEGIN_BCS_BATCH(batch, 13);
517 OUT_BCS_BATCH(batch, HCP_TILE_STATE | (13 - 2));
520 pic_param->num_tile_columns_minus1 << 5 |
521 pic_param->num_tile_rows_minus1);
522 intel_batchbuffer_data(batch, pos_col, 20);
523 intel_batchbuffer_data(batch, pos_row, 24);
525 ADVANCE_BCS_BATCH(batch);
529 gen9_hcpd_get_reference_picture_frame_id(VAPictureHEVC *ref_pic,
530 GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
534 if (ref_pic->picture_id == VA_INVALID_ID ||
535 (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
538 for (i = 0; i < MAX_GEN_HCP_REFERENCE_FRAMES; i++) {
539 if (ref_pic->picture_id == frame_store[i].surface_id) {
540 assert(frame_store[i].frame_store_id < MAX_GEN_HCP_REFERENCE_FRAMES);
541 return frame_store[i].frame_store_id;
545 /* Should never get here !!! */
551 gen9_hcpd_ref_idx_state_1(struct intel_batchbuffer *batch,
553 VAPictureParameterBufferHEVC *pic_param,
554 VASliceParameterBufferHEVC *slice_param,
555 GenFrameStore frame_store[MAX_GEN_HCP_REFERENCE_FRAMES])
558 uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
559 uint8_t *ref_list = slice_param->RefPicList[list];
561 BEGIN_BCS_BATCH(batch, 18);
563 OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
565 num_ref_minus1 << 1 |
568 for (i = 0; i < 16; i++) {
569 if (i < MIN((num_ref_minus1 + 1), 15)) {
570 VAPictureHEVC *ref_pic = &pic_param->ReferenceFrames[ref_list[i]];
571 VAPictureHEVC *curr_pic = &pic_param->CurrPic;
574 !(ref_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD) << 15 |
575 !!(ref_pic->flags & VA_PICTURE_HEVC_FIELD_PIC) << 14 |
576 !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |
579 gen9_hcpd_get_reference_picture_frame_id(ref_pic, frame_store) << 8 |
580 (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
582 OUT_BCS_BATCH(batch, 0);
586 ADVANCE_BCS_BATCH(batch);
590 gen9_hcpd_ref_idx_state(VADriverContextP ctx,
591 VAPictureParameterBufferHEVC *pic_param,
592 VASliceParameterBufferHEVC *slice_param,
593 struct gen9_hcpd_context *gen9_hcpd_context)
595 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
597 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
600 gen9_hcpd_ref_idx_state_1(batch, 0, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
602 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
605 gen9_hcpd_ref_idx_state_1(batch, 1, pic_param, slice_param, gen9_hcpd_context->reference_surfaces);
609 gen9_hcpd_weightoffset_state_1(struct intel_batchbuffer *batch,
611 VASliceParameterBufferHEVC *slice_param)
614 uint8_t num_ref_minus1 = (list == 1) ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1;
615 int8_t *luma_offset = (list == 1) ? slice_param->luma_offset_l1 : slice_param->luma_offset_l0;
616 int8_t *delta_luma_weight = (list == 1) ? slice_param->delta_luma_weight_l1 : slice_param->delta_luma_weight_l0;
617 int8_t (* chroma_offset)[2] = (list == 1) ? slice_param->ChromaOffsetL1 : slice_param->ChromaOffsetL0;
618 int8_t (* delta_chroma_weight)[2] = (list == 1) ? slice_param->delta_chroma_weight_l1 : slice_param->delta_chroma_weight_l0;
620 BEGIN_BCS_BATCH(batch, 34);
622 OUT_BCS_BATCH(batch, HCP_WEIGHTOFFSET | (34 - 2));
623 OUT_BCS_BATCH(batch, list);
625 for (i = 0; i < 16; i++) {
626 if (i < MIN((num_ref_minus1 + 1), 15)) {
628 (luma_offset[i] & 0xff) << 8 |
629 (delta_luma_weight[i] & 0xff));
631 OUT_BCS_BATCH(batch, 0);
634 for (i = 0; i < 16; i++) {
635 if (i < MIN((num_ref_minus1 + 1), 15)) {
637 (chroma_offset[i][1] & 0xff) << 24 |
638 (delta_chroma_weight[i][1] & 0xff) << 16 |
639 (chroma_offset[i][0] & 0xff) << 8 |
640 (delta_chroma_weight[i][0] & 0xff));
642 OUT_BCS_BATCH(batch, 0);
646 ADVANCE_BCS_BATCH(batch);
650 gen9_hcpd_weightoffset_state(VADriverContextP ctx,
651 VAPictureParameterBufferHEVC *pic_param,
652 VASliceParameterBufferHEVC *slice_param,
653 struct gen9_hcpd_context *gen9_hcpd_context)
655 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
657 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
660 if ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P &&
661 !pic_param->pic_fields.bits.weighted_pred_flag) ||
662 (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
663 !pic_param->pic_fields.bits.weighted_bipred_flag))
666 gen9_hcpd_weightoffset_state_1(batch, 0, slice_param);
668 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
671 gen9_hcpd_weightoffset_state_1(batch, 1, slice_param);
675 gen9_hcpd_get_collocated_ref_idx(VADriverContextP ctx,
676 VAPictureParameterBufferHEVC *pic_param,
677 VASliceParameterBufferHEVC *slice_param,
678 struct gen9_hcpd_context *gen9_hcpd_context)
681 VAPictureHEVC *ref_pic;
683 if (slice_param->collocated_ref_idx > 14)
686 if (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
689 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
692 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P ||
693 (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B &&
694 slice_param->LongSliceFlags.fields.collocated_from_l0_flag))
695 ref_list = slice_param->RefPicList[0];
697 assert(slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_B);
698 ref_list = slice_param->RefPicList[1];
701 ref_pic = &pic_param->ReferenceFrames[ref_list[slice_param->collocated_ref_idx]];
703 return gen9_hcpd_get_reference_picture_frame_id(ref_pic, gen9_hcpd_context->reference_surfaces);
707 gen9_hcpd_is_list_low_delay(uint8_t ref_list_count,
708 uint8_t ref_list[15],
709 VAPictureHEVC *curr_pic,
710 VAPictureHEVC ref_surfaces[15])
714 for (i = 0; i < MIN(ref_list_count, 15); i++) {
715 VAPictureHEVC *ref_pic;
717 if (ref_list[i] > 14)
720 ref_pic = &ref_surfaces[ref_list[i]];
722 if (ref_pic->pic_order_cnt > curr_pic->pic_order_cnt)
730 gen9_hcpd_is_low_delay(VADriverContextP ctx,
731 VAPictureParameterBufferHEVC *pic_param,
732 VASliceParameterBufferHEVC *slice_param)
734 if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I)
736 else if (slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_P)
737 return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
738 slice_param->RefPicList[0],
740 pic_param->ReferenceFrames);
742 return gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l0_active_minus1 + 1,
743 slice_param->RefPicList[0],
745 pic_param->ReferenceFrames) &&
746 gen9_hcpd_is_list_low_delay(slice_param->num_ref_idx_l1_active_minus1 + 1,
747 slice_param->RefPicList[1],
749 pic_param->ReferenceFrames);
753 gen9_hcpd_slice_state(VADriverContextP ctx,
754 VAPictureParameterBufferHEVC *pic_param,
755 VASliceParameterBufferHEVC *slice_param,
756 VASliceParameterBufferHEVC *next_slice_param,
757 struct gen9_hcpd_context *gen9_hcpd_context)
759 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
760 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
761 unsigned short collocated_ref_idx, collocated_from_l0_flag;
763 slice_hor_pos = slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
764 slice_ver_pos = slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
766 if (next_slice_param) {
767 next_slice_hor_pos = next_slice_param->slice_segment_address % gen9_hcpd_context->picture_width_in_ctbs;
768 next_slice_ver_pos = next_slice_param->slice_segment_address / gen9_hcpd_context->picture_width_in_ctbs;
770 next_slice_hor_pos = 0;
771 next_slice_ver_pos = 0;
774 collocated_ref_idx = gen9_hcpd_get_collocated_ref_idx(ctx, pic_param, slice_param, gen9_hcpd_context);
775 collocated_from_l0_flag = slice_param->LongSliceFlags.fields.collocated_from_l0_flag;
777 if ((!gen9_hcpd_context->first_inter_slice_valid) &&
778 (slice_param->LongSliceFlags.fields.slice_type != HEVC_SLICE_I) &&
779 slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag) {
780 gen9_hcpd_context->first_inter_slice_collocated_ref_idx = collocated_ref_idx;
781 gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag = collocated_from_l0_flag;
782 gen9_hcpd_context->first_inter_slice_valid = 1;
786 if (gen9_hcpd_context->first_inter_slice_valid &&
787 ((slice_param->LongSliceFlags.fields.slice_type == HEVC_SLICE_I) ||
788 (!slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag))) {
789 collocated_ref_idx = gen9_hcpd_context->first_inter_slice_collocated_ref_idx;
790 collocated_from_l0_flag = gen9_hcpd_context->first_inter_slice_collocated_from_l0_flag;
793 BEGIN_BCS_BATCH(batch, 9);
795 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
798 slice_ver_pos << 16 |
801 next_slice_ver_pos << 16 |
804 (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
805 (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
806 (pic_param->init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 6 |
807 slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag << 5 |
808 slice_param->LongSliceFlags.fields.dependent_slice_segment_flag << 4 |
809 !next_slice_param << 2 |
810 slice_param->LongSliceFlags.fields.slice_type);
812 collocated_ref_idx << 26 |
813 (5 - slice_param->five_minus_max_num_merge_cand - 1) << 23 |
814 slice_param->LongSliceFlags.fields.cabac_init_flag << 22 |
815 slice_param->luma_log2_weight_denom << 19 |
816 ((slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) & 0x7) << 16 |
817 collocated_from_l0_flag << 15 |
818 gen9_hcpd_is_low_delay(ctx, pic_param, slice_param) << 14 |
819 slice_param->LongSliceFlags.fields.mvd_l1_zero_flag << 13 |
820 slice_param->LongSliceFlags.fields.slice_sao_luma_flag << 12 |
821 slice_param->LongSliceFlags.fields.slice_sao_chroma_flag << 11 |
822 slice_param->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag << 10 |
823 (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
824 (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
825 slice_param->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag);
827 slice_param->slice_data_byte_offset); /* DW 5 */
832 OUT_BCS_BATCH(batch, 0); /* Ignored for decoding */
833 OUT_BCS_BATCH(batch, 0); /* Ignored for decoding */
835 ADVANCE_BCS_BATCH(batch);
839 gen9_hcpd_bsd_object(VADriverContextP ctx,
840 VASliceParameterBufferHEVC *slice_param,
841 struct gen9_hcpd_context *gen9_hcpd_context)
843 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
845 BEGIN_BCS_BATCH(batch, 3);
847 OUT_BCS_BATCH(batch, HCP_BSD_OBJECT | (3 - 2));
849 OUT_BCS_BATCH(batch, slice_param->slice_data_size);
850 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
852 ADVANCE_BCS_BATCH(batch);
856 gen9_hcpd_hevc_decode_picture(VADriverContextP ctx,
857 struct decode_state *decode_state,
858 struct gen9_hcpd_context *gen9_hcpd_context)
861 struct i965_driver_data *i965 = i965_driver_data(ctx);
862 struct intel_batchbuffer *batch = gen9_hcpd_context->base.batch;
863 VAPictureParameterBufferHEVC *pic_param;
864 VASliceParameterBufferHEVC *slice_param, *next_slice_param, *next_slice_group_param;
865 dri_bo *slice_data_bo;
868 vaStatus = gen9_hcpd_hevc_decode_init(ctx, decode_state, gen9_hcpd_context);
870 if (vaStatus != VA_STATUS_SUCCESS)
873 assert(decode_state->pic_param && decode_state->pic_param->buffer);
874 pic_param = (VAPictureParameterBufferHEVC *)decode_state->pic_param->buffer;
876 if (i965->intel.has_bsd2)
877 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
879 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
880 intel_batchbuffer_emit_mi_flush(batch);
882 gen9_hcpd_pipe_mode_select(ctx, decode_state, HCP_CODEC_HEVC, gen9_hcpd_context);
883 gen9_hcpd_surface_state(ctx, decode_state, gen9_hcpd_context);
884 gen9_hcpd_pipe_buf_addr_state(ctx, decode_state, gen9_hcpd_context);
885 gen9_hcpd_hevc_qm_state(ctx, decode_state, gen9_hcpd_context);
886 gen9_hcpd_pic_state(ctx, decode_state, gen9_hcpd_context);
888 if (pic_param->pic_fields.bits.tiles_enabled_flag)
889 gen9_hcpd_tile_state(ctx, decode_state, gen9_hcpd_context);
891 /* Need to double it works or not if the two slice groups have differenct slice data buffers */
892 for (j = 0; j < decode_state->num_slice_params; j++) {
893 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
894 slice_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j]->buffer;
895 slice_data_bo = decode_state->slice_datas[j]->bo;
897 gen9_hcpd_ind_obj_base_addr_state(ctx, slice_data_bo, gen9_hcpd_context);
899 if (j == decode_state->num_slice_params - 1)
900 next_slice_group_param = NULL;
902 next_slice_group_param = (VASliceParameterBufferHEVC *)decode_state->slice_params[j + 1]->buffer;
904 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
905 if (i < decode_state->slice_params[j]->num_elements - 1)
906 next_slice_param = slice_param + 1;
908 next_slice_param = next_slice_group_param;
910 gen9_hcpd_slice_state(ctx, pic_param, slice_param, next_slice_param, gen9_hcpd_context);
911 gen9_hcpd_ref_idx_state(ctx, pic_param, slice_param, gen9_hcpd_context);
912 gen9_hcpd_weightoffset_state(ctx, pic_param, slice_param, gen9_hcpd_context);
913 gen9_hcpd_bsd_object(ctx, slice_param, gen9_hcpd_context);
918 intel_batchbuffer_end_atomic(batch);
919 intel_batchbuffer_flush(batch);
926 gen9_hcpd_decode_picture(VADriverContextP ctx,
928 union codec_state *codec_state,
929 struct hw_context *hw_context)
931 struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
932 struct decode_state *decode_state = &codec_state->decode;
935 assert(gen9_hcpd_context);
937 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
939 if (vaStatus != VA_STATUS_SUCCESS)
943 case VAProfileHEVCMain:
944 case VAProfileHEVCMain10:
945 vaStatus = gen9_hcpd_hevc_decode_picture(ctx, decode_state, gen9_hcpd_context);
949 /* should never get here 1!! */
959 gen9_hcpd_context_destroy(void *hw_context)
961 struct gen9_hcpd_context *gen9_hcpd_context = (struct gen9_hcpd_context *)hw_context;
963 FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_line_buffer));
964 FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_line_buffer));
965 FREE_GEN_BUFFER((&gen9_hcpd_context->deblocking_filter_tile_column_buffer));
966 FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_line_buffer));
967 FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_line_buffer));
968 FREE_GEN_BUFFER((&gen9_hcpd_context->metadata_tile_column_buffer));
969 FREE_GEN_BUFFER((&gen9_hcpd_context->sao_line_buffer));
970 FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_line_buffer));
971 FREE_GEN_BUFFER((&gen9_hcpd_context->sao_tile_column_buffer));
973 intel_batchbuffer_free(gen9_hcpd_context->base.batch);
974 free(gen9_hcpd_context);
978 gen9_hcpd_hevc_context_init(VADriverContextP ctx,
979 struct gen9_hcpd_context *gen9_hcpd_context)
981 hevc_gen_default_iq_matrix(&gen9_hcpd_context->iq_matrix_hevc);
984 static struct hw_context *
985 gen9_hcpd_context_init(VADriverContextP ctx, struct object_config *object_config)
987 struct intel_driver_data *intel = intel_driver_data(ctx);
988 struct gen9_hcpd_context *gen9_hcpd_context = calloc(1, sizeof(struct gen9_hcpd_context));
991 if (!gen9_hcpd_context)
994 gen9_hcpd_context->base.destroy = gen9_hcpd_context_destroy;
995 gen9_hcpd_context->base.run = gen9_hcpd_decode_picture;
996 gen9_hcpd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
998 for (i = 0; i < ARRAY_ELEMS(gen9_hcpd_context->reference_surfaces); i++) {
999 gen9_hcpd_context->reference_surfaces[i].surface_id = VA_INVALID_ID;
1000 gen9_hcpd_context->reference_surfaces[i].frame_store_id = -1;
1001 gen9_hcpd_context->reference_surfaces[i].obj_surface = NULL;
1004 switch (object_config->profile) {
1005 case VAProfileHEVCMain:
1006 case VAProfileHEVCMain10:
1007 gen9_hcpd_hevc_context_init(ctx, gen9_hcpd_context);
1014 return (struct hw_context *)gen9_hcpd_context;
1018 gen9_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
1020 if (obj_config->profile == VAProfileHEVCMain ||
1021 obj_config->profile == VAProfileHEVCMain10) {
1022 return gen9_hcpd_context_init(ctx, obj_config);
1024 return gen8_dec_hw_context_init(ctx, obj_config);