2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include "vp8_probs.h"
47 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
48 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
49 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51 #define MFC_SOFTWARE_HASWELL 1
54 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
56 static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = {
57 #include "shaders/utils/mfc_batchbuffer_avc_intra.g9b"
60 static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = {
61 #include "shaders/utils/mfc_batchbuffer_avc_inter.g9b"
64 static struct i965_kernel gen9_mfc_kernels[] = {
66 "MFC AVC INTRA BATCHBUFFER ",
67 MFC_BATCHBUFFER_AVC_INTRA,
68 gen9_mfc_batchbuffer_avc_intra,
69 sizeof(gen9_mfc_batchbuffer_avc_intra),
74 "MFC AVC INTER BATCHBUFFER ",
75 MFC_BATCHBUFFER_AVC_INTER,
76 gen9_mfc_batchbuffer_avc_inter,
77 sizeof(gen9_mfc_batchbuffer_avc_inter),
82 #define INTER_MODE_MASK 0x03
83 #define INTER_8X8 0x03
84 #define INTER_16X8 0x01
85 #define INTER_8X16 0x02
86 #define SUBMB_SHAPE_MASK 0x00FF00
88 #define INTER_MV8 (4 << 20)
89 #define INTER_MV32 (6 << 20)
92 gen9_mfc_pipe_mode_select(VADriverContextP ctx,
94 struct intel_encoder_context *encoder_context)
96 struct intel_batchbuffer *batch = encoder_context->base.batch;
97 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
99 assert(standard_select == MFX_FORMAT_MPEG2 ||
100 standard_select == MFX_FORMAT_AVC ||
101 standard_select == MFX_FORMAT_VP8);
103 BEGIN_BCS_BATCH(batch, 5);
105 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
107 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
108 (MFD_MODE_VLD << 15) | /* VLD mode */
109 (0 << 10) | /* Stream-Out Enable */
110 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
111 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
112 (0 << 6) | /* frame statistics stream-out enable*/
113 (0 << 5) | /* not in stitch mode */
114 (1 << 4) | /* encoding mode */
115 (standard_select << 0)); /* standard select: avc or mpeg2 */
117 (0 << 7) | /* expand NOA bus flag */
118 (0 << 6) | /* disable slice-level clock gating */
119 (0 << 5) | /* disable clock gating for NOA */
120 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
121 (0 << 3) | /* terminate if AVC mbdata error occurs */
122 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
125 OUT_BCS_BATCH(batch, 0);
126 OUT_BCS_BATCH(batch, 0);
128 ADVANCE_BCS_BATCH(batch);
132 gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
134 struct intel_batchbuffer *batch = encoder_context->base.batch;
135 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
137 BEGIN_BCS_BATCH(batch, 6);
139 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
140 OUT_BCS_BATCH(batch, 0);
142 ((mfc_context->surface_state.height - 1) << 18) |
143 ((mfc_context->surface_state.width - 1) << 4));
145 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
146 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
147 (0 << 22) | /* surface object control state, FIXME??? */
148 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
149 (0 << 2) | /* must be 0 for interleave U/V */
150 (1 << 1) | /* must be tiled */
151 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
153 (0 << 16) | /* must be 0 for interleave U/V */
154 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
155 OUT_BCS_BATCH(batch, 0);
157 ADVANCE_BCS_BATCH(batch);
161 gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
162 struct intel_encoder_context *encoder_context)
164 struct intel_batchbuffer *batch = encoder_context->base.batch;
165 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
166 struct gen6_vme_context *vme_context = encoder_context->vme_context;
169 BEGIN_BCS_BATCH(batch, 26);
171 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
172 /* the DW1-3 is for the MFX indirect bistream offset */
173 OUT_BCS_BATCH(batch, 0);
174 OUT_BCS_BATCH(batch, 0);
175 OUT_BCS_BATCH(batch, 0);
177 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179 /* the DW4-5 is the MFX upper bound */
180 if (encoder_context->codec == CODEC_VP8) {
182 mfc_context->mfc_indirect_pak_bse_object.bo,
183 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
184 mfc_context->mfc_indirect_pak_bse_object.end_offset);
185 OUT_BCS_BATCH(batch, 0);
186 /* the DW6-10 is for MFX Indirect MV Object Base Address */
187 OUT_BCS_BATCH(batch, 0);
188 OUT_BCS_BATCH(batch, 0);
189 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
193 OUT_BCS_BATCH(batch, 0);
194 OUT_BCS_BATCH(batch, 0);
195 /* the DW6-10 is for MFX Indirect MV Object Base Address */
196 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
197 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, 0);
199 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
200 OUT_BCS_BATCH(batch, 0);
203 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
204 OUT_BCS_BATCH(batch, 0);
205 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
210 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
211 OUT_BCS_BATCH(batch, 0);
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
219 mfc_context->mfc_indirect_pak_bse_object.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
222 OUT_BCS_BATCH(batch, 0);
223 OUT_BCS_BATCH(batch, 0);
226 mfc_context->mfc_indirect_pak_bse_object.bo,
227 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 mfc_context->mfc_indirect_pak_bse_object.end_offset);
229 OUT_BCS_BATCH(batch, 0);
231 ADVANCE_BCS_BATCH(batch);
235 gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
236 struct intel_encoder_context *encoder_context)
238 struct intel_batchbuffer *batch = encoder_context->base.batch;
239 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
240 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
242 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
243 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
245 BEGIN_BCS_BATCH(batch, 16);
247 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
248 /*DW1. MB setting of frame */
250 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
252 ((height_in_mbs - 1) << 16) |
253 ((width_in_mbs - 1) << 0));
256 (0 << 24) | /* Second Chroma QP Offset */
257 (0 << 16) | /* Chroma QP Offset */
258 (0 << 14) | /* Max-bit conformance Intra flag */
259 (0 << 13) | /* Max Macroblock size conformance Inter flag */
260 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
261 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
262 (0 << 8) | /* FIXME: Image Structure */
263 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
265 (0 << 16) | /* Mininum Frame size */
266 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
267 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
268 (0 << 13) | /* CABAC 0 word insertion test enable */
269 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
270 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
271 (0 << 8) | /* FIXME: MbMvFormatFlag */
272 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
273 (0 << 6) | /* Only valid for VLD decoding mode */
274 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
275 (0 << 4) | /* Direct 8x8 inference flag */
276 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
277 (1 << 2) | /* Frame MB only flag */
278 (0 << 1) | /* MBAFF mode is in active */
279 (0 << 0)); /* Field picture flag */
280 /* DW5 Trellis quantization */
281 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
282 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
283 (0xBB8 << 16) | /* InterMbMaxSz */
284 (0xEE8) ); /* IntraMbMaxSz */
285 OUT_BCS_BATCH(batch, 0); /* Reserved */
287 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
288 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
289 /* DW10. Bit setting for MB */
290 OUT_BCS_BATCH(batch, 0x8C000000);
291 OUT_BCS_BATCH(batch, 0x00010000);
293 OUT_BCS_BATCH(batch, 0);
294 OUT_BCS_BATCH(batch, 0x02010100);
295 /* DW14. For short format */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
299 ADVANCE_BCS_BATCH(batch);
303 gen9_mfc_qm_state(VADriverContextP ctx,
307 struct intel_encoder_context *encoder_context)
309 struct intel_batchbuffer *batch = encoder_context->base.batch;
310 unsigned int qm_buffer[16];
312 assert(qm_length <= 16);
313 assert(sizeof(*qm) == 4);
314 memcpy(qm_buffer, qm, qm_length * 4);
316 BEGIN_BCS_BATCH(batch, 18);
317 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
318 OUT_BCS_BATCH(batch, qm_type << 0);
319 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
320 ADVANCE_BCS_BATCH(batch);
324 gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
326 unsigned int qm[16] = {
327 0x10101010, 0x10101010, 0x10101010, 0x10101010,
328 0x10101010, 0x10101010, 0x10101010, 0x10101010,
329 0x10101010, 0x10101010, 0x10101010, 0x10101010,
330 0x10101010, 0x10101010, 0x10101010, 0x10101010
333 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
334 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
335 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
336 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
340 gen9_mfc_fqm_state(VADriverContextP ctx,
344 struct intel_encoder_context *encoder_context)
346 struct intel_batchbuffer *batch = encoder_context->base.batch;
347 unsigned int fqm_buffer[32];
349 assert(fqm_length <= 32);
350 assert(sizeof(*fqm) == 4);
351 memcpy(fqm_buffer, fqm, fqm_length * 4);
353 BEGIN_BCS_BATCH(batch, 34);
354 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
355 OUT_BCS_BATCH(batch, fqm_type << 0);
356 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
357 ADVANCE_BCS_BATCH(batch);
361 gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
363 unsigned int qm[32] = {
364 0x10001000, 0x10001000, 0x10001000, 0x10001000,
365 0x10001000, 0x10001000, 0x10001000, 0x10001000,
366 0x10001000, 0x10001000, 0x10001000, 0x10001000,
367 0x10001000, 0x10001000, 0x10001000, 0x10001000,
368 0x10001000, 0x10001000, 0x10001000, 0x10001000,
369 0x10001000, 0x10001000, 0x10001000, 0x10001000,
370 0x10001000, 0x10001000, 0x10001000, 0x10001000,
371 0x10001000, 0x10001000, 0x10001000, 0x10001000
374 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
375 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
376 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
377 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
381 gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
382 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
383 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
384 struct intel_batchbuffer *batch)
387 batch = encoder_context->base.batch;
389 if (data_bits_in_last_dw == 0)
390 data_bits_in_last_dw = 32;
392 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
394 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
396 (0 << 16) | /* always start at offset 0 */
397 (data_bits_in_last_dw << 8) |
398 (skip_emul_byte_count << 4) |
399 (!!emulation_flag << 3) |
400 ((!!is_last_header) << 2) |
401 ((!!is_end_of_slice) << 1) |
402 (0 << 0)); /* FIXME: ??? */
403 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
405 ADVANCE_BCS_BATCH(batch);
409 static void gen9_mfc_init(VADriverContextP ctx,
410 struct encode_state *encode_state,
411 struct intel_encoder_context *encoder_context)
413 struct i965_driver_data *i965 = i965_driver_data(ctx);
414 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
417 int width_in_mbs = 0;
418 int height_in_mbs = 0;
419 int slice_batchbuffer_size;
421 if (encoder_context->codec == CODEC_H264 ||
422 encoder_context->codec == CODEC_H264_MVC) {
423 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
424 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
425 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
427 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
429 assert(encoder_context->codec == CODEC_MPEG2);
431 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
432 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
435 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
436 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
438 /*Encode common setup for MFC*/
439 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
440 mfc_context->post_deblocking_output.bo = NULL;
442 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
443 mfc_context->pre_deblocking_output.bo = NULL;
445 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
446 mfc_context->uncompressed_picture_source.bo = NULL;
448 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
449 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
451 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
452 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
453 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
454 mfc_context->direct_mv_buffers[i].bo = NULL;
457 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
458 if (mfc_context->reference_surfaces[i].bo != NULL)
459 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
460 mfc_context->reference_surfaces[i].bo = NULL;
463 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
464 bo = dri_bo_alloc(i965->intel.bufmgr,
469 mfc_context->intra_row_store_scratch_buffer.bo = bo;
471 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
472 bo = dri_bo_alloc(i965->intel.bufmgr,
474 width_in_mbs * height_in_mbs * 16,
477 mfc_context->macroblock_status_buffer.bo = bo;
479 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
480 bo = dri_bo_alloc(i965->intel.bufmgr,
482 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
485 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
487 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
488 bo = dri_bo_alloc(i965->intel.bufmgr,
490 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
493 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
495 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
496 mfc_context->mfc_batchbuffer_surface.bo = NULL;
498 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
499 mfc_context->aux_batchbuffer_surface.bo = NULL;
501 if (mfc_context->aux_batchbuffer)
502 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
504 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
505 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
506 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
507 mfc_context->aux_batchbuffer_surface.pitch = 16;
508 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
509 mfc_context->aux_batchbuffer_surface.size_block = 16;
511 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
515 gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx,
516 struct intel_encoder_context *encoder_context)
518 struct intel_batchbuffer *batch = encoder_context->base.batch;
519 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
522 BEGIN_BCS_BATCH(batch, 61);
524 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
526 /* the DW1-3 is for pre_deblocking */
527 if (mfc_context->pre_deblocking_output.bo)
528 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
529 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
532 OUT_BCS_BATCH(batch, 0); /* pre output addr */
534 OUT_BCS_BATCH(batch, 0);
535 OUT_BCS_BATCH(batch, 0);
536 /* the DW4-6 is for the post_deblocking */
538 /* post output addr */
539 if (mfc_context->post_deblocking_output.bo)
540 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
541 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544 OUT_BCS_BATCH(batch, 0);
546 OUT_BCS_BATCH(batch, 0);
547 OUT_BCS_BATCH(batch, 0);
549 /* the DW7-9 is for the uncompressed_picture */
550 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
551 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552 0); /* uncompressed data */
554 OUT_BCS_BATCH(batch, 0);
555 OUT_BCS_BATCH(batch, 0);
557 /* the DW10-12 is for the mb status */
558 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
559 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560 0); /* StreamOut data*/
562 OUT_BCS_BATCH(batch, 0);
563 OUT_BCS_BATCH(batch, 0);
565 /* the DW13-15 is for the intra_row_store_scratch */
566 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
567 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
570 OUT_BCS_BATCH(batch, 0);
571 OUT_BCS_BATCH(batch, 0);
573 /* the DW16-18 is for the deblocking filter */
574 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
575 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578 OUT_BCS_BATCH(batch, 0);
579 OUT_BCS_BATCH(batch, 0);
581 /* the DW 19-50 is for Reference pictures*/
582 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
583 if ( mfc_context->reference_surfaces[i].bo != NULL) {
584 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
585 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
588 OUT_BCS_BATCH(batch, 0);
591 OUT_BCS_BATCH(batch, 0);
594 OUT_BCS_BATCH(batch, 0);
596 /* The DW 52-54 is for the MB status buffer */
597 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
598 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
601 OUT_BCS_BATCH(batch, 0);
602 OUT_BCS_BATCH(batch, 0);
604 /* the DW 55-57 is the ILDB buffer */
605 OUT_BCS_BATCH(batch, 0);
606 OUT_BCS_BATCH(batch, 0);
607 OUT_BCS_BATCH(batch, 0);
609 /* the DW 58-60 is the second ILDB buffer */
610 OUT_BCS_BATCH(batch, 0);
611 OUT_BCS_BATCH(batch, 0);
612 OUT_BCS_BATCH(batch, 0);
614 ADVANCE_BCS_BATCH(batch);
618 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
619 struct intel_encoder_context *encoder_context)
621 struct intel_batchbuffer *batch = encoder_context->base.batch;
622 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
626 BEGIN_BCS_BATCH(batch, 71);
628 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
630 /* Reference frames and Current frames */
631 /* the DW1-32 is for the direct MV for reference */
632 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
633 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
634 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
635 I915_GEM_DOMAIN_INSTRUCTION, 0,
637 OUT_BCS_BATCH(batch, 0);
639 OUT_BCS_BATCH(batch, 0);
640 OUT_BCS_BATCH(batch, 0);
644 OUT_BCS_BATCH(batch, 0);
646 /* the DW34-36 is the MV for the current reference */
647 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
648 I915_GEM_DOMAIN_INSTRUCTION, 0,
651 OUT_BCS_BATCH(batch, 0);
652 OUT_BCS_BATCH(batch, 0);
655 for(i = 0; i < 32; i++) {
656 OUT_BCS_BATCH(batch, i/2);
658 OUT_BCS_BATCH(batch, 0);
659 OUT_BCS_BATCH(batch, 0);
661 ADVANCE_BCS_BATCH(batch);
666 gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
667 struct intel_encoder_context *encoder_context)
669 struct intel_batchbuffer *batch = encoder_context->base.batch;
670 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
672 BEGIN_BCS_BATCH(batch, 10);
674 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
675 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
676 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
678 OUT_BCS_BATCH(batch, 0);
679 OUT_BCS_BATCH(batch, 0);
681 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
682 OUT_BCS_BATCH(batch, 0);
683 OUT_BCS_BATCH(batch, 0);
684 OUT_BCS_BATCH(batch, 0);
686 /* the DW7-9 is for Bitplane Read Buffer Base Address */
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 OUT_BCS_BATCH(batch, 0);
691 ADVANCE_BCS_BATCH(batch);
695 static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
696 struct encode_state *encode_state,
697 struct intel_encoder_context *encoder_context)
699 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
701 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
702 mfc_context->set_surface_state(ctx, encoder_context);
703 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
704 gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
705 gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
706 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
707 mfc_context->avc_qm_state(ctx, encoder_context);
708 mfc_context->avc_fqm_state(ctx, encoder_context);
709 gen9_mfc_avc_directmode_state(ctx, encoder_context);
710 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
714 static VAStatus gen9_mfc_run(VADriverContextP ctx,
715 struct encode_state *encode_state,
716 struct intel_encoder_context *encoder_context)
718 struct intel_batchbuffer *batch = encoder_context->base.batch;
720 intel_batchbuffer_flush(batch); //run the pipeline
722 return VA_STATUS_SUCCESS;
727 gen9_mfc_stop(VADriverContextP ctx,
728 struct encode_state *encode_state,
729 struct intel_encoder_context *encoder_context,
730 int *encoded_bits_size)
732 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
733 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
734 VACodedBufferSegment *coded_buffer_segment;
736 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
737 assert(vaStatus == VA_STATUS_SUCCESS);
738 *encoded_bits_size = coded_buffer_segment->size * 8;
739 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
741 return VA_STATUS_SUCCESS;
746 gen9_mfc_avc_slice_state(VADriverContextP ctx,
747 VAEncPictureParameterBufferH264 *pic_param,
748 VAEncSliceParameterBufferH264 *slice_param,
749 struct encode_state *encode_state,
750 struct intel_encoder_context *encoder_context,
751 int rate_control_enable,
753 struct intel_batchbuffer *batch)
755 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
756 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
757 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
758 int beginmb = slice_param->macroblock_address;
759 int endmb = beginmb + slice_param->num_macroblocks;
760 int beginx = beginmb % width_in_mbs;
761 int beginy = beginmb / width_in_mbs;
762 int nextx = endmb % width_in_mbs;
763 int nexty = endmb / width_in_mbs;
764 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
765 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
767 unsigned char correct[6], grow, shrink;
769 int weighted_pred_idc = 0;
770 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
771 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
772 int num_ref_l0 = 0, num_ref_l1 = 0;
775 batch = encoder_context->base.batch;
777 if (slice_type == SLICE_TYPE_I) {
778 luma_log2_weight_denom = 0;
779 chroma_log2_weight_denom = 0;
780 } else if (slice_type == SLICE_TYPE_P) {
781 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
782 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
784 if (slice_param->num_ref_idx_active_override_flag)
785 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
786 } else if (slice_type == SLICE_TYPE_B) {
787 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
788 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
789 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
791 if (slice_param->num_ref_idx_active_override_flag) {
792 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
793 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
796 if (weighted_pred_idc == 2) {
797 /* 8.4.3 - Derivation process for prediction weights (8-279) */
798 luma_log2_weight_denom = 5;
799 chroma_log2_weight_denom = 5;
803 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
804 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
806 for (i = 0; i < 6; i++)
807 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
809 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
810 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
811 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
812 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
814 BEGIN_BCS_BATCH(batch, 11);;
816 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
817 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
822 (chroma_log2_weight_denom << 8) |
823 (luma_log2_weight_denom << 0));
826 (weighted_pred_idc << 30) |
827 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
828 (slice_param->disable_deblocking_filter_idc << 27) |
829 (slice_param->cabac_init_idc << 24) |
830 (qp<<16) | /*Slice Quantization Parameter*/
831 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
832 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
834 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
836 slice_param->macroblock_address );
837 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
839 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
840 (1 << 30) | /*ResetRateControlCounter*/
841 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
842 (4 << 24) | /*RC Stable Tolerance, middle level*/
843 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
844 (0 << 22) | /*QP mode, don't modfiy CBP*/
845 (0 << 21) | /*MB Type Direct Conversion Enabled*/
846 (0 << 20) | /*MB Type Skip Conversion Enabled*/
847 (last_slice << 19) | /*IsLastSlice*/
848 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
849 (1 << 17) | /*HeaderPresentFlag*/
850 (1 << 16) | /*SliceData PresentFlag*/
851 (1 << 15) | /*TailPresentFlag*/
852 (1 << 13) | /*RBSP NAL TYPE*/
853 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
854 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
856 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
857 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
867 OUT_BCS_BATCH(batch, 0);
869 ADVANCE_BCS_BATCH(batch);
873 #ifdef MFC_SOFTWARE_HASWELL
876 gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
877 int qp,unsigned int *msg,
878 struct intel_encoder_context *encoder_context,
879 unsigned char target_mb_size, unsigned char max_mb_size,
880 struct intel_batchbuffer *batch)
882 int len_in_dwords = 12;
883 unsigned int intra_msg;
884 #define INTRA_MSG_FLAG (1 << 13)
885 #define INTRA_MBTYPE_MASK (0x1F0000)
887 batch = encoder_context->base.batch;
889 BEGIN_BCS_BATCH(batch, len_in_dwords);
891 intra_msg = msg[0] & 0xC0FF;
892 intra_msg |= INTRA_MSG_FLAG;
893 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
894 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
895 OUT_BCS_BATCH(batch, 0);
896 OUT_BCS_BATCH(batch, 0);
898 (0 << 24) | /* PackedMvNum, Debug*/
899 (0 << 20) | /* No motion vector */
900 (1 << 19) | /* CbpDcY */
901 (1 << 18) | /* CbpDcU */
902 (1 << 17) | /* CbpDcV */
905 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
906 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
908 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
910 /*Stuff for Intra MB*/
911 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
912 OUT_BCS_BATCH(batch, msg[2]);
913 OUT_BCS_BATCH(batch, msg[3]&0xFF);
915 /*MaxSizeInWord and TargetSzieInWord*/
916 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
917 (target_mb_size << 16) );
919 OUT_BCS_BATCH(batch, 0);
921 ADVANCE_BCS_BATCH(batch);
923 return len_in_dwords;
927 gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
928 unsigned int *msg, unsigned int offset,
929 struct intel_encoder_context *encoder_context,
930 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
931 struct intel_batchbuffer *batch)
933 struct gen6_vme_context *vme_context = encoder_context->vme_context;
934 int len_in_dwords = 12;
935 unsigned int inter_msg = 0;
937 batch = encoder_context->base.batch;
939 #define MSG_MV_OFFSET 4
940 unsigned int *mv_ptr;
941 mv_ptr = msg + MSG_MV_OFFSET;
942 /* MV of VME output is based on 16 sub-blocks. So it is necessary
943 * to convert them to be compatible with the format of AVC_PAK
946 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
947 /* MV[0] and MV[2] are replicated */
948 mv_ptr[4] = mv_ptr[0];
949 mv_ptr[5] = mv_ptr[1];
950 mv_ptr[2] = mv_ptr[8];
951 mv_ptr[3] = mv_ptr[9];
952 mv_ptr[6] = mv_ptr[8];
953 mv_ptr[7] = mv_ptr[9];
954 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
955 /* MV[0] and MV[1] are replicated */
956 mv_ptr[2] = mv_ptr[0];
957 mv_ptr[3] = mv_ptr[1];
958 mv_ptr[4] = mv_ptr[16];
959 mv_ptr[5] = mv_ptr[17];
960 mv_ptr[6] = mv_ptr[24];
961 mv_ptr[7] = mv_ptr[25];
962 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
963 !(msg[1] & SUBMB_SHAPE_MASK)) {
964 /* Don't touch MV[0] or MV[1] */
965 mv_ptr[2] = mv_ptr[8];
966 mv_ptr[3] = mv_ptr[9];
967 mv_ptr[4] = mv_ptr[16];
968 mv_ptr[5] = mv_ptr[17];
969 mv_ptr[6] = mv_ptr[24];
970 mv_ptr[7] = mv_ptr[25];
974 BEGIN_BCS_BATCH(batch, len_in_dwords);
976 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
980 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
981 if (msg[1] & SUBMB_SHAPE_MASK)
984 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
985 OUT_BCS_BATCH(batch, offset);
986 inter_msg = msg[0] & (0x1F00FFFF);
987 inter_msg |= INTER_MV8;
988 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
989 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
990 (msg[1] & SUBMB_SHAPE_MASK)) {
991 inter_msg |= INTER_MV32;
994 OUT_BCS_BATCH(batch, inter_msg);
996 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
997 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
999 if ( slice_type == SLICE_TYPE_B) {
1000 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1002 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1005 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1008 inter_msg = msg[1] >> 8;
1009 /*Stuff for Inter MB*/
1010 OUT_BCS_BATCH(batch, inter_msg);
1011 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1012 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1014 /*MaxSizeInWord and TargetSzieInWord*/
1015 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1016 (target_mb_size << 16) );
1018 OUT_BCS_BATCH(batch, 0x0);
1019 ADVANCE_BCS_BATCH(batch);
1021 return len_in_dwords;
1024 #define AVC_INTRA_RDO_OFFSET 4
1025 #define AVC_INTER_RDO_OFFSET 10
1026 #define AVC_INTER_MSG_OFFSET 8
1027 #define AVC_INTER_MV_OFFSET 48
1028 #define AVC_RDO_MASK 0xFFFF
1031 gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1032 struct encode_state *encode_state,
1033 struct intel_encoder_context *encoder_context,
1035 struct intel_batchbuffer *slice_batch)
1037 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1038 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1039 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1040 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1041 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1042 unsigned int *msg = NULL, offset = 0;
1043 unsigned char *msg_ptr = NULL;
1044 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1045 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1046 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1048 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1049 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1050 unsigned int tail_data[] = { 0x0, 0x0 };
1051 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1052 int is_intra = slice_type == SLICE_TYPE_I;
1056 if (rate_control_mode == VA_RC_CBR) {
1057 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1058 if (encode_state->slice_header_index[slice_index] == 0) {
1059 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1064 /* only support for 8-bit pixel bit-depth */
1065 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1066 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1067 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1068 assert(qp >= 0 && qp < 52);
1070 gen9_mfc_avc_slice_state(ctx,
1073 encode_state, encoder_context,
1074 (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1076 if ( slice_index == 0)
1077 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1079 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1081 dri_bo_map(vme_context->vme_output.bo , 1);
1082 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1085 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1087 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1090 for (i = pSliceParameter->macroblock_address;
1091 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1092 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1093 x = i % width_in_mbs;
1094 y = i / width_in_mbs;
1095 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1099 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1101 int inter_rdo, intra_rdo;
1102 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1103 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1104 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1105 if (intra_rdo < inter_rdo) {
1106 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1108 msg += AVC_INTER_MSG_OFFSET;
1109 gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1114 dri_bo_unmap(vme_context->vme_output.bo);
1117 mfc_context->insert_object(ctx, encoder_context,
1119 2, 1, 1, 0, slice_batch);
1121 mfc_context->insert_object(ctx, encoder_context,
1123 1, 1, 1, 0, slice_batch);
1130 gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1131 struct encode_state *encode_state,
1132 struct intel_encoder_context *encoder_context)
1134 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1135 struct intel_batchbuffer *batch;
1139 batch = mfc_context->aux_batchbuffer;
1140 batch_bo = batch->buffer;
1141 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1142 gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1145 intel_batchbuffer_align(batch, 8);
1147 BEGIN_BCS_BATCH(batch, 2);
1148 OUT_BCS_BATCH(batch, 0);
1149 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1150 ADVANCE_BCS_BATCH(batch);
1152 dri_bo_reference(batch_bo);
1153 intel_batchbuffer_free(batch);
1154 mfc_context->aux_batchbuffer = NULL;
1162 gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1163 struct encode_state *encode_state,
1164 struct intel_encoder_context *encoder_context)
1167 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1168 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1170 assert(vme_context->vme_output.bo);
1171 mfc_context->buffer_suface_setup(ctx,
1172 &mfc_context->gpe_context,
1173 &vme_context->vme_output,
1174 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1175 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1176 assert(mfc_context->aux_batchbuffer_surface.bo);
1177 mfc_context->buffer_suface_setup(ctx,
1178 &mfc_context->gpe_context,
1179 &mfc_context->aux_batchbuffer_surface,
1180 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1181 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1185 gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1186 struct encode_state *encode_state,
1187 struct intel_encoder_context *encoder_context)
1190 struct i965_driver_data *i965 = i965_driver_data(ctx);
1191 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1192 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1193 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1194 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1195 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1196 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1197 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1198 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1200 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1202 mfc_context->buffer_suface_setup(ctx,
1203 &mfc_context->gpe_context,
1204 &mfc_context->mfc_batchbuffer_surface,
1205 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1206 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1210 gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1211 struct encode_state *encode_state,
1212 struct intel_encoder_context *encoder_context)
1214 gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1215 gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1219 gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1220 struct encode_state *encode_state,
1221 struct intel_encoder_context *encoder_context)
1223 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1224 struct gen6_interface_descriptor_data *desc;
1228 bo = mfc_context->gpe_context.idrt.bo;
1230 assert(bo->virtual);
1233 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1234 struct i965_kernel *kernel;
1236 kernel = &mfc_context->gpe_context.kernels[i];
1237 assert(sizeof(*desc) == 32);
1239 /*Setup the descritor table*/
1240 memset(desc, 0, sizeof(*desc));
1241 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1242 desc->desc2.sampler_count = 0;
1243 desc->desc2.sampler_state_pointer = 0;
1244 desc->desc3.binding_table_entry_count = 2;
1245 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1246 desc->desc4.constant_urb_entry_read_offset = 0;
1247 desc->desc4.constant_urb_entry_read_length = 4;
1250 dri_bo_emit_reloc(bo,
1251 I915_GEM_DOMAIN_INSTRUCTION, 0,
1253 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1262 gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1263 struct encode_state *encode_state,
1264 struct intel_encoder_context *encoder_context)
1266 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1272 gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1275 int batchbuffer_offset,
1287 BEGIN_BATCH(batch, 12);
1289 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1290 OUT_BATCH(batch, index);
1291 OUT_BATCH(batch, 0);
1292 OUT_BATCH(batch, 0);
1293 OUT_BATCH(batch, 0);
1294 OUT_BATCH(batch, 0);
1297 OUT_BATCH(batch, head_offset);
1298 OUT_BATCH(batch, batchbuffer_offset);
1303 number_mb_cmds << 16 |
1314 ADVANCE_BATCH(batch);
1318 gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1319 struct intel_encoder_context *encoder_context,
1320 VAEncSliceParameterBufferH264 *slice_param,
1322 unsigned short head_size,
1323 unsigned short tail_size,
1324 int batchbuffer_offset,
1328 struct intel_batchbuffer *batch = encoder_context->base.batch;
1329 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1330 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1331 int total_mbs = slice_param->num_macroblocks;
1332 int number_mb_cmds = 128;
1333 int starting_mb = 0;
1334 int last_object = 0;
1335 int first_object = 1;
1338 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1340 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1341 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1342 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1343 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1344 assert(mb_x <= 255 && mb_y <= 255);
1346 starting_mb += number_mb_cmds;
1348 gen9_mfc_batchbuffer_emit_object_command(batch,
1364 head_offset += head_size;
1365 batchbuffer_offset += head_size;
1369 head_offset += tail_size;
1370 batchbuffer_offset += tail_size;
1373 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1380 number_mb_cmds = total_mbs % number_mb_cmds;
1381 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1382 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1383 assert(mb_x <= 255 && mb_y <= 255);
1384 starting_mb += number_mb_cmds;
1386 gen9_mfc_batchbuffer_emit_object_command(batch,
1404 * return size in Owords (16bytes)
1407 gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1408 struct encode_state *encode_state,
1409 struct intel_encoder_context *encoder_context,
1411 int batchbuffer_offset)
1413 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1414 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1415 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1416 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1417 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1418 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1419 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1420 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1421 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1422 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1423 unsigned int tail_data[] = { 0x0, 0x0 };
1425 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1426 unsigned short head_size, tail_size;
1427 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1431 if (rate_control_mode == VA_RC_CBR) {
1432 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1433 if (encode_state->slice_header_index[slice_index] == 0) {
1434 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1439 /* only support for 8-bit pixel bit-depth */
1440 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1441 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1442 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1443 assert(qp >= 0 && qp < 52);
1445 head_offset = old_used / 16;
1446 gen9_mfc_avc_slice_state(ctx,
1451 (rate_control_mode == VA_RC_CBR),
1455 if (slice_index == 0)
1456 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1459 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1462 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1463 used = intel_batchbuffer_used_size(slice_batch);
1464 head_size = (used - old_used) / 16;
1469 mfc_context->insert_object(ctx,
1480 mfc_context->insert_object(ctx,
1492 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1493 used = intel_batchbuffer_used_size(slice_batch);
1494 tail_size = (used - old_used) / 16;
1496 gen9_mfc_avc_batchbuffer_slice_command(ctx,
1506 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1510 gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1511 struct encode_state *encode_state,
1512 struct intel_encoder_context *encoder_context)
1514 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1515 struct intel_batchbuffer *batch = encoder_context->base.batch;
1516 int i, size, offset = 0;
1518 intel_batchbuffer_start_atomic(batch, 0x4000);
1519 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1521 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1522 size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1526 intel_batchbuffer_end_atomic(batch);
1527 intel_batchbuffer_flush(batch);
1531 gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1532 struct encode_state *encode_state,
1533 struct intel_encoder_context *encoder_context)
1535 gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1536 gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1537 gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1538 gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1542 gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1543 struct encode_state *encode_state,
1544 struct intel_encoder_context *encoder_context)
1546 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1548 gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1549 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1551 return mfc_context->mfc_batchbuffer_surface.bo;
1557 gen9_mfc_avc_pipeline_programing(VADriverContextP ctx,
1558 struct encode_state *encode_state,
1559 struct intel_encoder_context *encoder_context)
1561 struct intel_batchbuffer *batch = encoder_context->base.batch;
1562 dri_bo *slice_batch_bo;
1564 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1565 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1570 #ifdef MFC_SOFTWARE_HASWELL
1571 slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1573 slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1577 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1578 intel_batchbuffer_emit_mi_flush(batch);
1580 // picture level programing
1581 gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1583 BEGIN_BCS_BATCH(batch, 3);
1584 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1585 OUT_BCS_RELOC(batch,
1587 I915_GEM_DOMAIN_COMMAND, 0,
1589 OUT_BCS_BATCH(batch, 0);
1590 ADVANCE_BCS_BATCH(batch);
1593 intel_batchbuffer_end_atomic(batch);
1595 dri_bo_unreference(slice_batch_bo);
1600 gen9_mfc_avc_encode_picture(VADriverContextP ctx,
1601 struct encode_state *encode_state,
1602 struct intel_encoder_context *encoder_context)
1604 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1605 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1606 int current_frame_bits_size;
1610 gen9_mfc_init(ctx, encode_state, encoder_context);
1611 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1612 /*Programing bcs pipeline*/
1613 gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1614 gen9_mfc_run(ctx, encode_state, encoder_context);
1615 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1616 gen9_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1617 sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1618 if (sts == BRC_NO_HRD_VIOLATION) {
1619 intel_mfc_hrd_context_update(encode_state, mfc_context);
1622 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1623 if (!mfc_context->hrd.violation_noted) {
1624 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1625 mfc_context->hrd.violation_noted = 1;
1627 return VA_STATUS_SUCCESS;
1634 return VA_STATUS_SUCCESS;
1642 va_to_gen9_mpeg2_picture_type[3] = {
1649 gen9_mfc_mpeg2_pic_state(VADriverContextP ctx,
1650 struct intel_encoder_context *encoder_context,
1651 struct encode_state *encode_state)
1653 struct intel_batchbuffer *batch = encoder_context->base.batch;
1654 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1655 VAEncPictureParameterBufferMPEG2 *pic_param;
1656 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1657 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1658 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1660 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1661 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1662 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1664 BEGIN_BCS_BATCH(batch, 13);
1665 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1666 OUT_BCS_BATCH(batch,
1667 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1668 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1669 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1670 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1671 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1672 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1673 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1674 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1675 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1676 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1677 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1678 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1679 OUT_BCS_BATCH(batch,
1680 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1681 va_to_gen9_mpeg2_picture_type[pic_param->picture_type] << 9 |
1683 OUT_BCS_BATCH(batch,
1684 1 << 31 | /* slice concealment */
1685 (height_in_mbs - 1) << 16 |
1686 (width_in_mbs - 1));
1688 if (slice_param && slice_param->quantiser_scale_code >= 14)
1689 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1691 OUT_BCS_BATCH(batch, 0);
1693 OUT_BCS_BATCH(batch, 0);
1694 OUT_BCS_BATCH(batch,
1695 0xFFF << 16 | /* InterMBMaxSize */
1696 0xFFF << 0 | /* IntraMBMaxSize */
1698 OUT_BCS_BATCH(batch, 0);
1699 OUT_BCS_BATCH(batch, 0);
1700 OUT_BCS_BATCH(batch, 0);
1701 OUT_BCS_BATCH(batch, 0);
1702 OUT_BCS_BATCH(batch, 0);
1703 OUT_BCS_BATCH(batch, 0);
1704 ADVANCE_BCS_BATCH(batch);
1708 gen9_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1710 unsigned char intra_qm[64] = {
1711 8, 16, 19, 22, 26, 27, 29, 34,
1712 16, 16, 22, 24, 27, 29, 34, 37,
1713 19, 22, 26, 27, 29, 34, 34, 38,
1714 22, 22, 26, 27, 29, 34, 37, 40,
1715 22, 26, 27, 29, 32, 35, 40, 48,
1716 26, 27, 29, 32, 35, 40, 48, 58,
1717 26, 27, 29, 34, 38, 46, 56, 69,
1718 27, 29, 35, 38, 46, 56, 69, 83
1721 unsigned char non_intra_qm[64] = {
1722 16, 16, 16, 16, 16, 16, 16, 16,
1723 16, 16, 16, 16, 16, 16, 16, 16,
1724 16, 16, 16, 16, 16, 16, 16, 16,
1725 16, 16, 16, 16, 16, 16, 16, 16,
1726 16, 16, 16, 16, 16, 16, 16, 16,
1727 16, 16, 16, 16, 16, 16, 16, 16,
1728 16, 16, 16, 16, 16, 16, 16, 16,
1729 16, 16, 16, 16, 16, 16, 16, 16
1732 gen9_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1733 gen9_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1737 gen9_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1739 unsigned short intra_fqm[64] = {
1740 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1741 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1742 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1743 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1744 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1745 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1746 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1747 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1750 unsigned short non_intra_fqm[64] = {
1751 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1752 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1753 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1754 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1755 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1756 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1757 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1758 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1761 gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1762 gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1766 gen9_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1767 struct intel_encoder_context *encoder_context,
1769 int next_x, int next_y,
1770 int is_fisrt_slice_group,
1771 int is_last_slice_group,
1774 struct intel_batchbuffer *batch)
1776 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1779 batch = encoder_context->base.batch;
1781 BEGIN_BCS_BATCH(batch, 8);
1783 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1784 OUT_BCS_BATCH(batch,
1785 0 << 31 | /* MbRateCtrlFlag */
1786 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1787 1 << 17 | /* Insert Header before the first slice group data */
1788 1 << 16 | /* SliceData PresentFlag: always 1 */
1789 1 << 15 | /* TailPresentFlag: always 1 */
1790 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1791 !!intra_slice << 13 | /* IntraSlice */
1792 !!intra_slice << 12 | /* IntraSliceFlag */
1794 OUT_BCS_BATCH(batch,
1800 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1801 /* bitstream pointer is only loaded once for the first slice of a frame when
1802 * LoadSlicePointerFlag is 0
1804 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1805 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1806 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1807 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1809 ADVANCE_BCS_BATCH(batch);
1813 gen9_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1814 struct intel_encoder_context *encoder_context,
1816 int first_mb_in_slice,
1817 int last_mb_in_slice,
1818 int first_mb_in_slice_group,
1819 int last_mb_in_slice_group,
1822 int coded_block_pattern,
1823 unsigned char target_size_in_word,
1824 unsigned char max_size_in_word,
1825 struct intel_batchbuffer *batch)
1827 int len_in_dwords = 9;
1830 batch = encoder_context->base.batch;
1832 BEGIN_BCS_BATCH(batch, len_in_dwords);
1834 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1835 OUT_BCS_BATCH(batch,
1836 0 << 24 | /* PackedMvNum */
1837 0 << 20 | /* MvFormat */
1838 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1839 0 << 15 | /* TransformFlag: frame DCT */
1840 0 << 14 | /* FieldMbFlag */
1841 1 << 13 | /* IntraMbFlag */
1842 mb_type << 8 | /* MbType: Intra */
1843 0 << 2 | /* SkipMbFlag */
1844 0 << 0 | /* InterMbMode */
1846 OUT_BCS_BATCH(batch, y << 16 | x);
1847 OUT_BCS_BATCH(batch,
1848 max_size_in_word << 24 |
1849 target_size_in_word << 16 |
1850 coded_block_pattern << 6 | /* CBP */
1852 OUT_BCS_BATCH(batch,
1853 last_mb_in_slice << 31 |
1854 first_mb_in_slice << 30 |
1855 0 << 27 | /* EnableCoeffClamp */
1856 last_mb_in_slice_group << 26 |
1857 0 << 25 | /* MbSkipConvDisable */
1858 first_mb_in_slice_group << 24 |
1859 0 << 16 | /* MvFieldSelect */
1860 qp_scale_code << 0 |
1862 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1863 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1864 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1865 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1867 ADVANCE_BCS_BATCH(batch);
1869 return len_in_dwords;
1873 #define MPEG2_INTER_MV_OFFSET 48
1875 static struct _mv_ranges
1877 int low; /* in the unit of 1/2 pixel */
1878 int high; /* in the unit of 1/2 pixel */
1893 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1895 if (mv + pos * 16 * 2 < 0 ||
1896 mv + (pos + 1) * 16 * 2 > display_max * 2)
1899 if (f_code > 0 && f_code < 10) {
1900 if (mv < mv_ranges[f_code].low)
1901 mv = mv_ranges[f_code].low;
1903 if (mv > mv_ranges[f_code].high)
1904 mv = mv_ranges[f_code].high;
1911 gen9_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1912 struct encode_state *encode_state,
1913 struct intel_encoder_context *encoder_context,
1915 int width_in_mbs, int height_in_mbs,
1917 int first_mb_in_slice,
1918 int last_mb_in_slice,
1919 int first_mb_in_slice_group,
1920 int last_mb_in_slice_group,
1922 unsigned char target_size_in_word,
1923 unsigned char max_size_in_word,
1924 struct intel_batchbuffer *batch)
1926 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1927 int len_in_dwords = 9;
1928 short *mvptr, mvx0, mvy0, mvx1, mvy1;
1931 batch = encoder_context->base.batch;
1933 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1934 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1935 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1936 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1937 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1939 BEGIN_BCS_BATCH(batch, len_in_dwords);
1941 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1942 OUT_BCS_BATCH(batch,
1943 2 << 24 | /* PackedMvNum */
1944 7 << 20 | /* MvFormat */
1945 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1946 0 << 15 | /* TransformFlag: frame DCT */
1947 0 << 14 | /* FieldMbFlag */
1948 0 << 13 | /* IntraMbFlag */
1949 1 << 8 | /* MbType: Frame-based */
1950 0 << 2 | /* SkipMbFlag */
1951 0 << 0 | /* InterMbMode */
1953 OUT_BCS_BATCH(batch, y << 16 | x);
1954 OUT_BCS_BATCH(batch,
1955 max_size_in_word << 24 |
1956 target_size_in_word << 16 |
1957 0x3f << 6 | /* CBP */
1959 OUT_BCS_BATCH(batch,
1960 last_mb_in_slice << 31 |
1961 first_mb_in_slice << 30 |
1962 0 << 27 | /* EnableCoeffClamp */
1963 last_mb_in_slice_group << 26 |
1964 0 << 25 | /* MbSkipConvDisable */
1965 first_mb_in_slice_group << 24 |
1966 0 << 16 | /* MvFieldSelect */
1967 qp_scale_code << 0 |
1970 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
1971 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
1972 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1973 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1975 ADVANCE_BCS_BATCH(batch);
1977 return len_in_dwords;
1981 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1982 struct encode_state *encode_state,
1983 struct intel_encoder_context *encoder_context,
1984 struct intel_batchbuffer *slice_batch)
1986 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1987 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1989 if (encode_state->packed_header_data[idx]) {
1990 VAEncPackedHeaderParameterBuffer *param = NULL;
1991 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1992 unsigned int length_in_bits;
1994 assert(encode_state->packed_header_param[idx]);
1995 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1996 length_in_bits = param->bit_length;
1998 mfc_context->insert_object(ctx,
2001 ALIGN(length_in_bits, 32) >> 5,
2002 length_in_bits & 0x1f,
2003 5, /* FIXME: check it */
2006 0, /* Needn't insert emulation bytes for MPEG-2 */
2010 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2012 if (encode_state->packed_header_data[idx]) {
2013 VAEncPackedHeaderParameterBuffer *param = NULL;
2014 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2015 unsigned int length_in_bits;
2017 assert(encode_state->packed_header_param[idx]);
2018 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2019 length_in_bits = param->bit_length;
2021 mfc_context->insert_object(ctx,
2024 ALIGN(length_in_bits, 32) >> 5,
2025 length_in_bits & 0x1f,
2026 5, /* FIXME: check it */
2029 0, /* Needn't insert emulation bytes for MPEG-2 */
2035 gen9_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2036 struct encode_state *encode_state,
2037 struct intel_encoder_context *encoder_context,
2039 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2040 struct intel_batchbuffer *slice_batch)
2042 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2043 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2044 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2045 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2046 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2047 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2048 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2049 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2051 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2052 unsigned int *msg = NULL;
2053 unsigned char *msg_ptr = NULL;
2055 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2056 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2057 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2058 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2060 dri_bo_map(vme_context->vme_output.bo , 0);
2061 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2063 if (next_slice_group_param) {
2064 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2065 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2067 h_next_start_pos = 0;
2068 v_next_start_pos = height_in_mbs;
2071 gen9_mfc_mpeg2_slicegroup_state(ctx,
2078 next_slice_group_param == NULL,
2079 slice_param->is_intra_slice,
2080 slice_param->quantiser_scale_code,
2083 if (slice_index == 0)
2084 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2086 /* Insert '00' to make sure the header is valid */
2087 mfc_context->insert_object(ctx,
2089 (unsigned int*)section_delimiter,
2091 8, /* 8bits in the last DWORD */
2098 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2099 /* PAK for each macroblocks */
2100 for (j = 0; j < slice_param->num_macroblocks; j++) {
2101 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2102 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2103 int first_mb_in_slice = (j == 0);
2104 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2105 int first_mb_in_slice_group = (i == 0 && j == 0);
2106 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2107 j == slice_param->num_macroblocks - 1);
2109 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2111 if (slice_param->is_intra_slice) {
2112 gen9_mfc_mpeg2_pak_object_intra(ctx,
2117 first_mb_in_slice_group,
2118 last_mb_in_slice_group,
2120 slice_param->quantiser_scale_code,
2126 int inter_rdo, intra_rdo;
2127 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2128 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2130 if (intra_rdo < inter_rdo)
2131 gen9_mfc_mpeg2_pak_object_intra(ctx,
2136 first_mb_in_slice_group,
2137 last_mb_in_slice_group,
2139 slice_param->quantiser_scale_code,
2145 gen9_mfc_mpeg2_pak_object_inter(ctx,
2149 width_in_mbs, height_in_mbs,
2153 first_mb_in_slice_group,
2154 last_mb_in_slice_group,
2155 slice_param->quantiser_scale_code,
2165 dri_bo_unmap(vme_context->vme_output.bo);
2168 if (next_slice_group_param == NULL) { /* end of a picture */
2169 mfc_context->insert_object(ctx,
2171 (unsigned int *)tail_delimiter,
2173 8, /* 8bits in the last DWORD */
2179 } else { /* end of a lsice group */
2180 mfc_context->insert_object(ctx,
2182 (unsigned int *)section_delimiter,
2184 8, /* 8bits in the last DWORD */
2194 * A batch buffer for all slices, including slice state
2195 * slice insert object and slice pak object commands
2198 gen9_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2199 struct encode_state *encode_state,
2200 struct intel_encoder_context *encoder_context)
2202 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2203 struct intel_batchbuffer *batch;
2204 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2208 batch = mfc_context->aux_batchbuffer;
2209 batch_bo = batch->buffer;
2211 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2212 if (i == encode_state->num_slice_params_ext - 1)
2213 next_slice_group_param = NULL;
2215 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2217 gen9_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2220 intel_batchbuffer_align(batch, 8);
2222 BEGIN_BCS_BATCH(batch, 2);
2223 OUT_BCS_BATCH(batch, 0);
2224 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2225 ADVANCE_BCS_BATCH(batch);
2227 dri_bo_reference(batch_bo);
2228 intel_batchbuffer_free(batch);
2229 mfc_context->aux_batchbuffer = NULL;
2235 gen9_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2236 struct encode_state *encode_state,
2237 struct intel_encoder_context *encoder_context)
2239 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2241 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2242 mfc_context->set_surface_state(ctx, encoder_context);
2243 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2244 gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2245 gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2246 gen9_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2247 gen9_mfc_mpeg2_qm_state(ctx, encoder_context);
2248 gen9_mfc_mpeg2_fqm_state(ctx, encoder_context);
2252 gen9_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2253 struct encode_state *encode_state,
2254 struct intel_encoder_context *encoder_context)
2256 struct intel_batchbuffer *batch = encoder_context->base.batch;
2257 dri_bo *slice_batch_bo;
2259 slice_batch_bo = gen9_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2262 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2263 intel_batchbuffer_emit_mi_flush(batch);
2265 // picture level programing
2266 gen9_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2268 BEGIN_BCS_BATCH(batch, 4);
2269 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2270 OUT_BCS_RELOC(batch,
2272 I915_GEM_DOMAIN_COMMAND, 0,
2274 OUT_BCS_BATCH(batch, 0);
2275 OUT_BCS_BATCH(batch, 0);
2276 ADVANCE_BCS_BATCH(batch);
2279 intel_batchbuffer_end_atomic(batch);
2281 dri_bo_unreference(slice_batch_bo);
2285 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2286 struct encode_state *encode_state,
2287 struct intel_encoder_context *encoder_context)
2289 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2290 struct object_surface *obj_surface;
2291 struct object_buffer *obj_buffer;
2292 struct i965_coded_buffer_segment *coded_buffer_segment;
2293 VAStatus vaStatus = VA_STATUS_SUCCESS;
2297 /* reconstructed surface */
2298 obj_surface = encode_state->reconstructed_object;
2299 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2300 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2301 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2302 mfc_context->surface_state.width = obj_surface->orig_width;
2303 mfc_context->surface_state.height = obj_surface->orig_height;
2304 mfc_context->surface_state.w_pitch = obj_surface->width;
2305 mfc_context->surface_state.h_pitch = obj_surface->height;
2307 /* forward reference */
2308 obj_surface = encode_state->reference_objects[0];
2310 if (obj_surface && obj_surface->bo) {
2311 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2312 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2314 mfc_context->reference_surfaces[0].bo = NULL;
2316 /* backward reference */
2317 obj_surface = encode_state->reference_objects[1];
2319 if (obj_surface && obj_surface->bo) {
2320 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2321 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2323 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2325 if (mfc_context->reference_surfaces[1].bo)
2326 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2329 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2330 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2332 if (mfc_context->reference_surfaces[i].bo)
2333 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2336 /* input YUV surface */
2337 obj_surface = encode_state->input_yuv_object;
2338 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2339 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2342 obj_buffer = encode_state->coded_buf_object;
2343 bo = obj_buffer->buffer_store->bo;
2344 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2345 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2346 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2347 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2349 /* set the internal flag to 0 to indicate the coded size is unknown */
2351 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2352 coded_buffer_segment->mapped = 0;
2353 coded_buffer_segment->codec = encoder_context->codec;
2360 gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2361 struct encode_state *encode_state,
2362 struct intel_encoder_context *encoder_context)
2364 gen9_mfc_init(ctx, encode_state, encoder_context);
2365 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2366 /*Programing bcs pipeline*/
2367 gen9_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2368 gen9_mfc_run(ctx, encode_state, encoder_context);
2370 return VA_STATUS_SUCCESS;
2373 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
2374 VAEncPictureParameterBufferVP8 *pic_param,
2375 VAQMatrixBufferVP8 *q_matrix)
2378 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
2379 unsigned char *coeff_probs_stream_in_buffer;
2381 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
2382 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
2383 mfc_context->vp8_state.frame_header_token_update_pos = 0;
2384 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
2386 mfc_context->vp8_state.prob_skip_false = 255;
2387 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
2388 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
2391 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2392 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2394 mfc_context->vp8_state.prob_intra = 255;
2395 mfc_context->vp8_state.prob_last = 128;
2396 mfc_context->vp8_state.prob_gf = 128;
2398 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2399 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2401 mfc_context->vp8_state.prob_intra = 63;
2402 mfc_context->vp8_state.prob_last = 128;
2403 mfc_context->vp8_state.prob_gf = 128;
2406 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
2408 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
2409 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
2410 assert(coeff_probs_stream_in_buffer);
2411 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
2412 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2415 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
2416 VAQMatrixBufferVP8 *q_matrix)
2419 /*some other probabilities need to be updated*/
2422 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
2423 VAEncPictureParameterBufferVP8 *pic_param,
2424 VAQMatrixBufferVP8 *q_matrix,
2425 struct gen6_mfc_context *mfc_context);
2427 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
2428 struct gen6_mfc_context *mfc_context)
2430 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2431 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2432 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2433 unsigned char *frame_header_buffer;
2435 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
2437 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
2438 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
2439 assert(frame_header_buffer);
2440 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
2441 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
2444 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
2445 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
2447 static void gen9_mfc_vp8_init(VADriverContextP ctx,
2448 struct encode_state *encode_state,
2449 struct intel_encoder_context *encoder_context)
2451 struct i965_driver_data *i965 = i965_driver_data(ctx);
2452 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2455 int width_in_mbs = 0;
2456 int height_in_mbs = 0;
2457 int slice_batchbuffer_size;
2459 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2460 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2461 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2463 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2464 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2466 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
2467 (SLICE_HEADER + SLICE_TAIL);
2469 /*Encode common setup for MFC*/
2470 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2471 mfc_context->post_deblocking_output.bo = NULL;
2473 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2474 mfc_context->pre_deblocking_output.bo = NULL;
2476 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2477 mfc_context->uncompressed_picture_source.bo = NULL;
2479 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2480 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2482 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2483 if ( mfc_context->direct_mv_buffers[i].bo != NULL)
2484 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2485 mfc_context->direct_mv_buffers[i].bo = NULL;
2488 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2489 if (mfc_context->reference_surfaces[i].bo != NULL)
2490 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2491 mfc_context->reference_surfaces[i].bo = NULL;
2494 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2495 bo = dri_bo_alloc(i965->intel.bufmgr,
2500 mfc_context->intra_row_store_scratch_buffer.bo = bo;
2502 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2503 bo = dri_bo_alloc(i965->intel.bufmgr,
2505 width_in_mbs * height_in_mbs * 16,
2508 mfc_context->macroblock_status_buffer.bo = bo;
2510 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2511 bo = dri_bo_alloc(i965->intel.bufmgr,
2513 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
2516 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2518 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2519 bo = dri_bo_alloc(i965->intel.bufmgr,
2521 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
2524 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2526 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2527 mfc_context->mfc_batchbuffer_surface.bo = NULL;
2529 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2530 mfc_context->aux_batchbuffer_surface.bo = NULL;
2532 if (mfc_context->aux_batchbuffer)
2533 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2535 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
2536 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
2537 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
2538 mfc_context->aux_batchbuffer_surface.pitch = 16;
2539 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
2540 mfc_context->aux_batchbuffer_surface.size_block = 16;
2542 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
2544 /* alloc vp8 encoding buffers*/
2545 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
2546 bo = dri_bo_alloc(i965->intel.bufmgr,
2548 MAX_VP8_FRAME_HEADER_SIZE,
2551 mfc_context->vp8_state.frame_header_bo = bo;
2553 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
2554 for(i = 0; i < 8; i++) {
2555 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
2557 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
2558 bo = dri_bo_alloc(i965->intel.bufmgr,
2560 mfc_context->vp8_state.intermediate_buffer_max_size,
2563 mfc_context->vp8_state.intermediate_bo = bo;
2565 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
2566 bo = dri_bo_alloc(i965->intel.bufmgr,
2568 width_in_mbs * height_in_mbs * 16,
2571 mfc_context->vp8_state.stream_out_bo = bo;
2573 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2574 bo = dri_bo_alloc(i965->intel.bufmgr,
2576 sizeof(vp8_default_coef_probs),
2579 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
2581 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
2582 bo = dri_bo_alloc(i965->intel.bufmgr,
2584 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
2587 mfc_context->vp8_state.token_statistics_bo = bo;
2589 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
2590 bo = dri_bo_alloc(i965->intel.bufmgr,
2592 width_in_mbs * 16 * 64,
2595 mfc_context->vp8_state.mpc_row_store_bo = bo;
2597 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
2598 vp8_enc_frame_header_binarize(encode_state, mfc_context);
2602 intel_mfc_vp8_prepare(VADriverContextP ctx,
2603 struct encode_state *encode_state,
2604 struct intel_encoder_context *encoder_context)
2606 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2607 struct object_surface *obj_surface;
2608 struct object_buffer *obj_buffer;
2609 struct i965_coded_buffer_segment *coded_buffer_segment;
2610 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2611 VAStatus vaStatus = VA_STATUS_SUCCESS;
2615 /* reconstructed surface */
2616 obj_surface = encode_state->reconstructed_object;
2617 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2618 if (pic_param->loop_filter_level[0] == 0) {
2619 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2620 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2622 mfc_context->post_deblocking_output.bo = obj_surface->bo;
2623 dri_bo_reference(mfc_context->post_deblocking_output.bo);
2626 mfc_context->surface_state.width = obj_surface->orig_width;
2627 mfc_context->surface_state.height = obj_surface->orig_height;
2628 mfc_context->surface_state.w_pitch = obj_surface->width;
2629 mfc_context->surface_state.h_pitch = obj_surface->height;
2631 /* set vp8 reference frames */
2632 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2633 obj_surface = encode_state->reference_objects[i];
2635 if (obj_surface && obj_surface->bo) {
2636 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2637 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2639 mfc_context->reference_surfaces[i].bo = NULL;
2643 /* input YUV surface */
2644 obj_surface = encode_state->input_yuv_object;
2645 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2646 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2649 obj_buffer = encode_state->coded_buf_object;
2650 bo = obj_buffer->buffer_store->bo;
2651 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2652 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2653 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2654 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2656 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
2657 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
2658 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
2659 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
2661 /* set the internal flag to 0 to indicate the coded size is unknown */
2663 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2664 coded_buffer_segment->mapped = 0;
2665 coded_buffer_segment->codec = encoder_context->codec;
2672 gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx,
2673 struct encode_state *encode_state,
2674 struct intel_encoder_context *encoder_context)
2676 struct intel_batchbuffer *batch = encoder_context->base.batch;
2677 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2678 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2679 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2681 BEGIN_BCS_BATCH(batch, 30);
2682 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
2684 OUT_BCS_BATCH(batch,
2685 0 << 9 | /* compressed bitstream output disable */
2686 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
2687 0 << 6 | /* RC initial pass */
2688 0 << 4 | /* upate segment feature date flag */
2689 1 << 3 | /* bitstream statistics output enable */
2690 1 << 2 | /* token statistics output enable */
2691 0 << 1 | /* final bitstream output disable */
2694 OUT_BCS_BATCH(batch, 0); /*DW2*/
2696 OUT_BCS_BATCH(batch,
2697 0xfff << 16 | /* max intra mb bit count limit */
2698 0xfff << 0 /* max inter mb bit count limit */
2701 OUT_BCS_BATCH(batch, 0); /*DW4*/
2702 OUT_BCS_BATCH(batch, 0); /*DW5*/
2703 OUT_BCS_BATCH(batch, 0); /*DW6*/
2704 OUT_BCS_BATCH(batch, 0); /*DW7*/
2705 OUT_BCS_BATCH(batch, 0); /*DW8*/
2706 OUT_BCS_BATCH(batch, 0); /*DW9*/
2707 OUT_BCS_BATCH(batch, 0); /*DW10*/
2708 OUT_BCS_BATCH(batch, 0); /*DW11*/
2709 OUT_BCS_BATCH(batch, 0); /*DW12*/
2710 OUT_BCS_BATCH(batch, 0); /*DW13*/
2711 OUT_BCS_BATCH(batch, 0); /*DW14*/
2712 OUT_BCS_BATCH(batch, 0); /*DW15*/
2713 OUT_BCS_BATCH(batch, 0); /*DW16*/
2714 OUT_BCS_BATCH(batch, 0); /*DW17*/
2715 OUT_BCS_BATCH(batch, 0); /*DW18*/
2716 OUT_BCS_BATCH(batch, 0); /*DW19*/
2717 OUT_BCS_BATCH(batch, 0); /*DW20*/
2718 OUT_BCS_BATCH(batch, 0); /*DW21*/
2720 OUT_BCS_BATCH(batch,
2721 pic_param->pic_flags.bits.show_frame << 23 |
2722 pic_param->pic_flags.bits.version << 20
2725 OUT_BCS_BATCH(batch,
2726 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
2727 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
2731 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
2734 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
2737 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
2740 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
2743 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
2746 OUT_BCS_BATCH(batch, 0);
2748 ADVANCE_BCS_BATCH(batch);
2752 gen9_mfc_vp8_pic_state(VADriverContextP ctx,
2753 struct encode_state *encode_state,
2754 struct intel_encoder_context *encoder_context)
2756 struct intel_batchbuffer *batch = encoder_context->base.batch;
2757 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2758 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2759 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2760 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2763 assert(pic_param->pic_flags.bits.num_token_partitions > 0);
2764 assert(pic_param->pic_flags.bits.num_token_partitions < 9);
2765 log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
2767 /*update mode and token probs*/
2768 vp8_enc_state_update(mfc_context, q_matrix);
2770 BEGIN_BCS_BATCH(batch, 38);
2771 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2772 OUT_BCS_BATCH(batch,
2773 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
2774 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
2776 OUT_BCS_BATCH(batch,
2778 pic_param->sharpness_level << 16 |
2779 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
2780 pic_param->pic_flags.bits.sign_bias_golden << 12 |
2781 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
2782 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
2783 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
2784 pic_param->pic_flags.bits.segmentation_enabled << 8 |
2785 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2786 (pic_param->pic_flags.bits.version / 2) << 4 |
2787 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2788 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
2790 OUT_BCS_BATCH(batch,
2791 pic_param->loop_filter_level[3] << 24 |
2792 pic_param->loop_filter_level[2] << 16 |
2793 pic_param->loop_filter_level[1] << 8 |
2794 pic_param->loop_filter_level[0] << 0);
2796 OUT_BCS_BATCH(batch,
2797 q_matrix->quantization_index[3] << 24 |
2798 q_matrix->quantization_index[2] << 16 |
2799 q_matrix->quantization_index[1] << 8 |
2800 q_matrix->quantization_index[0] << 0);
2802 OUT_BCS_BATCH(batch,
2803 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
2804 abs(q_matrix->quantization_index_delta[4]) << 24 |
2805 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
2806 abs(q_matrix->quantization_index_delta[3]) << 16 |
2807 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
2808 abs(q_matrix->quantization_index_delta[2]) << 8 |
2809 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
2810 abs(q_matrix->quantization_index_delta[1]) << 0);
2812 OUT_BCS_BATCH(batch,
2813 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
2814 abs(q_matrix->quantization_index_delta[0]) << 0);
2816 OUT_BCS_BATCH(batch,
2817 pic_param->clamp_qindex_high << 8 |
2818 pic_param->clamp_qindex_low << 0);
2820 for (i = 8; i < 19; i++) {
2821 OUT_BCS_BATCH(batch, 0xffffffff);
2824 OUT_BCS_BATCH(batch,
2825 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
2826 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
2827 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
2829 OUT_BCS_BATCH(batch,
2830 mfc_context->vp8_state.prob_skip_false << 24 |
2831 mfc_context->vp8_state.prob_intra << 16 |
2832 mfc_context->vp8_state.prob_last << 8 |
2833 mfc_context->vp8_state.prob_gf << 0);
2835 OUT_BCS_BATCH(batch,
2836 mfc_context->vp8_state.y_mode_probs[3] << 24 |
2837 mfc_context->vp8_state.y_mode_probs[2] << 16 |
2838 mfc_context->vp8_state.y_mode_probs[1] << 8 |
2839 mfc_context->vp8_state.y_mode_probs[0] << 0);
2841 OUT_BCS_BATCH(batch,
2842 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
2843 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
2844 mfc_context->vp8_state.uv_mode_probs[0] << 0);
2846 /* MV update value, DW23-DW32 */
2847 for (i = 0; i < 2; i++) {
2848 for (j = 0; j < 20; j += 4) {
2849 OUT_BCS_BATCH(batch,
2850 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
2851 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
2852 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
2853 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
2857 OUT_BCS_BATCH(batch,
2858 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
2859 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
2860 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
2861 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
2863 OUT_BCS_BATCH(batch,
2864 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
2865 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
2866 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
2867 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
2869 OUT_BCS_BATCH(batch, 0);
2870 OUT_BCS_BATCH(batch, 0);
2871 OUT_BCS_BATCH(batch, 0);
2873 ADVANCE_BCS_BATCH(batch);
2876 #define OUT_VP8_BUFFER(bo, offset) \
2878 OUT_BCS_RELOC(batch, \
2880 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
2883 OUT_BCS_BATCH(batch, 0); \
2884 OUT_BCS_BATCH(batch, 0); \
2885 OUT_BCS_BATCH(batch, 0);
2888 gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
2889 struct encode_state *encode_state,
2890 struct intel_encoder_context *encoder_context)
2892 struct intel_batchbuffer *batch = encoder_context->base.batch;
2893 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2895 BEGIN_BCS_BATCH(batch, 32);
2896 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
2898 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
2900 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
2901 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
2902 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
2903 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
2904 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
2905 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
2906 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
2907 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
2908 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
2909 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
2911 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
2912 OUT_BCS_BATCH(batch, 0);
2914 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
2915 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
2916 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
2917 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
2919 ADVANCE_BCS_BATCH(batch);
2923 gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
2924 struct encode_state *encode_state,
2925 struct intel_encoder_context *encoder_context)
2927 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2929 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
2930 mfc_context->set_surface_state(ctx, encoder_context);
2931 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2932 gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2933 gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2934 gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
2935 gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
2936 gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
2940 gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx,
2941 struct intel_encoder_context *encoder_context,
2944 struct intel_batchbuffer *batch)
2947 batch = encoder_context->base.batch;
2949 BEGIN_BCS_BATCH(batch, 7);
2951 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
2952 OUT_BCS_BATCH(batch, 0);
2953 OUT_BCS_BATCH(batch, 0);
2954 OUT_BCS_BATCH(batch,
2955 (0 << 20) | /* mv format: intra mb */
2956 (0 << 18) | /* Segment ID */
2957 (0 << 17) | /* disable coeff clamp */
2958 (1 << 13) | /* intra mb flag */
2959 (0 << 11) | /* refer picture select: last frame */
2960 (0 << 8) | /* mb type: 16x16 intra mb */
2961 (0 << 4) | /* mb uv mode: dc_pred */
2962 (0 << 2) | /* skip mb flag: disable */
2965 OUT_BCS_BATCH(batch, (y << 16) | x);
2966 OUT_BCS_BATCH(batch, 0); /* y_mode: dc_pred */
2967 OUT_BCS_BATCH(batch, 0);
2969 ADVANCE_BCS_BATCH(batch);
2973 gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
2974 struct intel_encoder_context *encoder_context,
2977 struct intel_batchbuffer *batch)
2979 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2982 batch = encoder_context->base.batch;
2984 BEGIN_BCS_BATCH(batch, 7);
2986 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
2987 OUT_BCS_BATCH(batch,
2988 (0 << 29) | /* enable inline mv data: disable */
2990 OUT_BCS_BATCH(batch,
2992 OUT_BCS_BATCH(batch,
2993 (4 << 20) | /* mv format: inter */
2994 (0 << 18) | /* Segment ID */
2995 (0 << 17) | /* coeff clamp: disable */
2996 (0 << 13) | /* intra mb flag: inter mb */
2997 (0 << 11) | /* refer picture select: last frame */
2998 (0 << 8) | /* mb type: 16x16 */
2999 (0 << 4) | /* mb uv mode: dc_pred */
3000 (0 << 2) | /* skip mb flag: disable */
3003 OUT_BCS_BATCH(batch, (y << 16) | x);
3006 OUT_BCS_BATCH(batch, 0x88888888);
3007 OUT_BCS_BATCH(batch, 0x88888888);
3009 ADVANCE_BCS_BATCH(batch);
3013 gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
3014 struct encode_state *encode_state,
3015 struct intel_encoder_context *encoder_context,
3016 struct intel_batchbuffer *slice_batch)
3018 struct gen6_vme_context *vme_context = encoder_context->vme_context;
3019 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3020 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3021 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3022 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3023 unsigned int *msg = NULL;
3024 unsigned char *msg_ptr = NULL;
3025 unsigned int i, is_intra_frame;
3027 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3029 dri_bo_map(vme_context->vme_output.bo , 1);
3030 msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
3032 for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
3033 int h_pos = i % width_in_mbs;
3034 int v_pos = i / width_in_mbs;
3036 if (is_intra_frame) {
3037 gen9_mfc_vp8_pak_object_intra(ctx,
3043 gen9_mfc_vp8_pak_object_inter(ctx,
3051 dri_bo_unmap(vme_context->vme_output.bo);
3055 * A batch buffer for vp8 pak object commands
3058 gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
3059 struct encode_state *encode_state,
3060 struct intel_encoder_context *encoder_context)
3062 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3063 struct intel_batchbuffer *batch;
3066 batch = mfc_context->aux_batchbuffer;
3067 batch_bo = batch->buffer;
3069 gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
3071 intel_batchbuffer_align(batch, 8);
3073 BEGIN_BCS_BATCH(batch, 2);
3074 OUT_BCS_BATCH(batch, 0);
3075 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
3076 ADVANCE_BCS_BATCH(batch);
3078 dri_bo_reference(batch_bo);
3079 intel_batchbuffer_free(batch);
3080 mfc_context->aux_batchbuffer = NULL;
3086 gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx,
3087 struct encode_state *encode_state,
3088 struct intel_encoder_context *encoder_context)
3090 struct intel_batchbuffer *batch = encoder_context->base.batch;
3091 dri_bo *slice_batch_bo;
3093 slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
3096 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3097 intel_batchbuffer_emit_mi_flush(batch);
3099 // picture level programing
3100 gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
3102 BEGIN_BCS_BATCH(batch, 4);
3103 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
3104 OUT_BCS_RELOC(batch,
3106 I915_GEM_DOMAIN_COMMAND, 0,
3108 OUT_BCS_BATCH(batch, 0);
3109 OUT_BCS_BATCH(batch, 0);
3110 ADVANCE_BCS_BATCH(batch);
3113 intel_batchbuffer_end_atomic(batch);
3115 dri_bo_unreference(slice_batch_bo);
3118 static void gen9_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
3119 struct encode_state *encode_state,
3120 struct intel_encoder_context *encoder_context)
3122 struct i965_driver_data *i965 = i965_driver_data(ctx);
3123 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3124 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3125 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3126 unsigned int *vp8_encoding_status, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
3128 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
3130 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
3131 first_partition_bytes = (*vp8_encoding_status + 7) / 8;
3132 token_partition_bytes = (*(unsigned int *)(vp8_encoding_status + 9) + 7) / 8;
3134 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream */
3135 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (pic_param->pic_flags.bits.num_token_partitions - 1) * 3;
3137 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
3139 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
3140 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
3141 coded_buffer_segment->base.size = vp8_coded_bytes;
3142 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
3146 gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
3147 struct encode_state *encode_state,
3148 struct intel_encoder_context *encoder_context)
3150 gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
3151 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
3152 /*Programing bcs pipeline*/
3153 gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
3154 gen9_mfc_run(ctx, encode_state, encoder_context);
3155 gen9_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
3157 return VA_STATUS_SUCCESS;
3161 gen9_mfc_context_destroy(void *context)
3163 struct gen6_mfc_context *mfc_context = context;
3166 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3167 mfc_context->post_deblocking_output.bo = NULL;
3169 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3170 mfc_context->pre_deblocking_output.bo = NULL;
3172 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3173 mfc_context->uncompressed_picture_source.bo = NULL;
3175 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3176 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3178 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3179 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3180 mfc_context->direct_mv_buffers[i].bo = NULL;
3183 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3184 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
3186 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3187 mfc_context->macroblock_status_buffer.bo = NULL;
3189 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3190 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3192 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3193 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3196 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3197 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3198 mfc_context->reference_surfaces[i].bo = NULL;
3201 i965_gpe_context_destroy(&mfc_context->gpe_context);
3203 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3204 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3206 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3207 mfc_context->aux_batchbuffer_surface.bo = NULL;
3209 if (mfc_context->aux_batchbuffer)
3210 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3212 mfc_context->aux_batchbuffer = NULL;
3214 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3215 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
3217 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3218 mfc_context->vp8_state.final_frame_bo = NULL;
3220 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3221 mfc_context->vp8_state.frame_header_bo = NULL;
3223 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3224 mfc_context->vp8_state.intermediate_bo = NULL;
3226 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3227 mfc_context->vp8_state.mpc_row_store_bo = NULL;
3229 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3230 mfc_context->vp8_state.stream_out_bo = NULL;
3232 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3233 mfc_context->vp8_state.token_statistics_bo = NULL;
3238 static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
3240 struct encode_state *encode_state,
3241 struct intel_encoder_context *encoder_context)
3246 case VAProfileH264ConstrainedBaseline:
3247 case VAProfileH264Main:
3248 case VAProfileH264High:
3249 case VAProfileH264MultiviewHigh:
3250 case VAProfileH264StereoHigh:
3251 vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
3254 /* FIXME: add for other profile */
3255 case VAProfileMPEG2Simple:
3256 case VAProfileMPEG2Main:
3257 vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
3260 case VAProfileVP8Version0_3:
3261 vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
3265 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3272 Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3274 struct gen6_mfc_context *mfc_context = NULL;
3276 #if MFC_SOFTWARE_HASWELL
3277 if ((encoder_context->codec == CODEC_H264) ||
3278 (encoder_context->codec == CODEC_H264_MVC) ||
3279 (encoder_context->codec == CODEC_MPEG2)) {
3281 return gen8_mfc_context_init(ctx, encoder_context);
3285 mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
3286 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
3288 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
3289 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
3291 mfc_context->gpe_context.curbe.length = 32 * 4;
3293 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
3294 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
3295 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
3296 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
3297 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
3299 i965_gpe_load_kernels(ctx,
3300 &mfc_context->gpe_context,
3304 mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select;
3305 mfc_context->set_surface_state = gen9_mfc_surface_state;
3306 mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state;
3307 mfc_context->avc_img_state = gen9_mfc_avc_img_state;
3308 mfc_context->avc_qm_state = gen9_mfc_avc_qm_state;
3309 mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state;
3310 mfc_context->insert_object = gen9_mfc_avc_insert_object;
3311 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
3313 encoder_context->mfc_context = mfc_context;
3314 encoder_context->mfc_context_destroy = gen9_mfc_context_destroy;
3315 encoder_context->mfc_pipeline = gen9_mfc_pipeline;
3316 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;