2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define MFC_SOFTWARE_BATCH 0
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75 16, 11, 10, 16, 24, 40, 51, 61,
76 12, 12, 14, 19, 26, 58, 60, 55,
77 14, 13, 16, 24, 40, 57, 69, 56,
78 14, 17, 22, 29, 51, 87, 80, 62,
79 18, 22, 37, 56, 68, 109, 103, 77,
80 24, 35, 55, 64, 81, 104, 113, 92,
81 49, 64, 78, 87, 103, 121, 120, 101,
82 72, 92, 95, 98, 112, 100, 103, 99
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88 17, 18, 24, 47, 99, 99, 99, 99,
89 18, 21, 26, 66, 99, 99, 99, 99,
90 24, 26, 56, 99, 99, 99, 99, 99,
91 47, 66, 99, 99, 99, 99, 99, 99,
92 99, 99, 99, 99, 99, 99, 99, 99,
93 99, 99, 99, 99, 99, 99, 99, 99,
94 99, 99, 99, 99, 99, 99, 99, 99,
95 99, 99, 99, 99, 99, 99, 99, 99
99 static const int va_to_gen7_jpeg_hufftable[2] = {
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
112 static struct i965_kernel gen8_mfc_kernels[] = {
114 "MFC AVC INTRA BATCHBUFFER ",
115 MFC_BATCHBUFFER_AVC_INTRA,
116 gen8_mfc_batchbuffer_avc,
117 sizeof(gen8_mfc_batchbuffer_avc),
122 static struct i965_kernel gen9_mfc_kernels[] = {
124 "MFC AVC INTRA BATCHBUFFER ",
125 MFC_BATCHBUFFER_AVC_INTRA,
126 gen9_mfc_batchbuffer_avc,
127 sizeof(gen9_mfc_batchbuffer_avc),
132 static const uint32_t qm_flat[16] = {
133 0x10101010, 0x10101010, 0x10101010, 0x10101010,
134 0x10101010, 0x10101010, 0x10101010, 0x10101010,
135 0x10101010, 0x10101010, 0x10101010, 0x10101010,
136 0x10101010, 0x10101010, 0x10101010, 0x10101010
139 static const uint32_t fqm_flat[32] = {
140 0x10001000, 0x10001000, 0x10001000, 0x10001000,
141 0x10001000, 0x10001000, 0x10001000, 0x10001000,
142 0x10001000, 0x10001000, 0x10001000, 0x10001000,
143 0x10001000, 0x10001000, 0x10001000, 0x10001000,
144 0x10001000, 0x10001000, 0x10001000, 0x10001000,
145 0x10001000, 0x10001000, 0x10001000, 0x10001000,
146 0x10001000, 0x10001000, 0x10001000, 0x10001000,
147 0x10001000, 0x10001000, 0x10001000, 0x10001000
150 #define INTER_MODE_MASK 0x03
151 #define INTER_8X8 0x03
152 #define INTER_16X8 0x01
153 #define INTER_8X16 0x02
154 #define SUBMB_SHAPE_MASK 0x00FF00
155 #define INTER_16X16 0x00
157 #define INTER_MV8 (4 << 20)
158 #define INTER_MV32 (6 << 20)
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
164 struct intel_encoder_context *encoder_context)
166 struct intel_batchbuffer *batch = encoder_context->base.batch;
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
169 assert(standard_select == MFX_FORMAT_MPEG2 ||
170 standard_select == MFX_FORMAT_AVC ||
171 standard_select == MFX_FORMAT_JPEG ||
172 standard_select == MFX_FORMAT_VP8);
174 BEGIN_BCS_BATCH(batch, 5);
176 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
178 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179 (MFD_MODE_VLD << 15) | /* VLD mode */
180 (0 << 10) | /* Stream-Out Enable */
181 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
182 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
183 (0 << 6) | /* frame statistics stream-out enable*/
184 (0 << 5) | /* not in stitch mode */
185 (1 << 4) | /* encoding mode */
186 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
188 (0 << 7) | /* expand NOA bus flag */
189 (0 << 6) | /* disable slice-level clock gating */
190 (0 << 5) | /* disable clock gating for NOA */
191 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
192 (0 << 3) | /* terminate if AVC mbdata error occurs */
193 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
199 ADVANCE_BCS_BATCH(batch);
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
205 struct intel_batchbuffer *batch = encoder_context->base.batch;
206 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
208 BEGIN_BCS_BATCH(batch, 6);
210 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211 OUT_BCS_BATCH(batch, 0);
213 ((mfc_context->surface_state.height - 1) << 18) |
214 ((mfc_context->surface_state.width - 1) << 4));
216 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218 (0 << 22) | /* surface object control state, FIXME??? */
219 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220 (0 << 2) | /* must be 0 for interleave U/V */
221 (1 << 1) | /* must be tiled */
222 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
224 (0 << 16) | /* must be 0 for interleave U/V */
225 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
226 OUT_BCS_BATCH(batch, 0);
228 ADVANCE_BCS_BATCH(batch);
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233 struct intel_encoder_context *encoder_context)
235 struct i965_driver_data *i965 = i965_driver_data(ctx);
236 struct intel_batchbuffer *batch = encoder_context->base.batch;
237 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
238 struct gen6_vme_context *vme_context = encoder_context->vme_context;
240 unsigned int bse_offset;
242 BEGIN_BCS_BATCH(batch, 26);
244 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
245 /* the DW1-3 is for the MFX indirect bistream offset */
246 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
250 /* the DW4-5 is the MFX upper bound */
251 if (encoder_context->codec == CODEC_VP8) {
253 mfc_context->mfc_indirect_pak_bse_object.bo,
254 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
255 mfc_context->mfc_indirect_pak_bse_object.end_offset);
256 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
262 if(encoder_context->codec != CODEC_JPEG) {
263 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
264 /* the DW6-10 is for MFX Indirect MV Object Base Address */
265 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
266 OUT_BCS_BATCH(batch, 0);
267 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
268 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
269 OUT_BCS_BATCH(batch, 0);
271 /* No VME for JPEG */
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
274 OUT_BCS_BATCH(batch, 0);
275 OUT_BCS_BATCH(batch, 0);
276 OUT_BCS_BATCH(batch, 0);
279 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
280 OUT_BCS_BATCH(batch, 0);
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 OUT_BCS_BATCH(batch, 0);
286 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
293 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
294 bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
296 mfc_context->mfc_indirect_pak_bse_object.bo,
297 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
303 mfc_context->mfc_indirect_pak_bse_object.bo,
304 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
305 mfc_context->mfc_indirect_pak_bse_object.end_offset);
306 OUT_BCS_BATCH(batch, 0);
308 ADVANCE_BCS_BATCH(batch);
312 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
313 struct intel_encoder_context *encoder_context)
315 struct intel_batchbuffer *batch = encoder_context->base.batch;
316 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
317 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
319 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
320 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
322 BEGIN_BCS_BATCH(batch, 16);
324 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
325 /*DW1. MB setting of frame */
327 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
329 ((height_in_mbs - 1) << 16) |
330 ((width_in_mbs - 1) << 0));
333 (0 << 24) | /* Second Chroma QP Offset */
334 (0 << 16) | /* Chroma QP Offset */
335 (0 << 14) | /* Max-bit conformance Intra flag */
336 (0 << 13) | /* Max Macroblock size conformance Inter flag */
337 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
338 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
339 (0 << 8) | /* FIXME: Image Structure */
340 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
342 (0 << 16) | /* Mininum Frame size */
343 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
344 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
345 (0 << 13) | /* CABAC 0 word insertion test enable */
346 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
347 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
348 (0 << 8) | /* FIXME: MbMvFormatFlag */
349 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
350 (0 << 6) | /* Only valid for VLD decoding mode */
351 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
352 (0 << 4) | /* Direct 8x8 inference flag */
353 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
354 (1 << 2) | /* Frame MB only flag */
355 (0 << 1) | /* MBAFF mode is in active */
356 (0 << 0)); /* Field picture flag */
357 /* DW5 Trellis quantization */
358 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
359 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
360 (0xBB8 << 16) | /* InterMbMaxSz */
361 (0xEE8) ); /* IntraMbMaxSz */
362 OUT_BCS_BATCH(batch, 0); /* Reserved */
364 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
365 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
366 /* DW10. Bit setting for MB */
367 OUT_BCS_BATCH(batch, 0x8C000000);
368 OUT_BCS_BATCH(batch, 0x00010000);
370 OUT_BCS_BATCH(batch, 0);
371 OUT_BCS_BATCH(batch, 0x02010100);
372 /* DW14. For short format */
373 OUT_BCS_BATCH(batch, 0);
374 OUT_BCS_BATCH(batch, 0);
376 ADVANCE_BCS_BATCH(batch);
380 gen8_mfc_qm_state(VADriverContextP ctx,
384 struct intel_encoder_context *encoder_context)
386 struct intel_batchbuffer *batch = encoder_context->base.batch;
387 unsigned int qm_buffer[16];
389 assert(qm_length <= 16);
390 assert(sizeof(*qm) == 4);
391 memcpy(qm_buffer, qm, qm_length * 4);
393 BEGIN_BCS_BATCH(batch, 18);
394 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
395 OUT_BCS_BATCH(batch, qm_type << 0);
396 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
397 ADVANCE_BCS_BATCH(batch);
401 gen8_mfc_avc_qm_state(VADriverContextP ctx,
402 struct encode_state *encode_state,
403 struct intel_encoder_context *encoder_context)
405 const unsigned int *qm_4x4_intra;
406 const unsigned int *qm_4x4_inter;
407 const unsigned int *qm_8x8_intra;
408 const unsigned int *qm_8x8_inter;
409 VAEncSequenceParameterBufferH264 *pSeqParameter =
410 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
411 VAEncPictureParameterBufferH264 *pPicParameter =
412 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
414 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
415 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
416 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
418 VAIQMatrixBufferH264 *qm;
419 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
420 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
421 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
422 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
423 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
424 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
427 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
428 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
429 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
430 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
434 gen8_mfc_fqm_state(VADriverContextP ctx,
438 struct intel_encoder_context *encoder_context)
440 struct intel_batchbuffer *batch = encoder_context->base.batch;
441 unsigned int fqm_buffer[32];
443 assert(fqm_length <= 32);
444 assert(sizeof(*fqm) == 4);
445 memcpy(fqm_buffer, fqm, fqm_length * 4);
447 BEGIN_BCS_BATCH(batch, 34);
448 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
449 OUT_BCS_BATCH(batch, fqm_type << 0);
450 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
451 ADVANCE_BCS_BATCH(batch);
455 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
458 for (i = 0; i < len; i++)
459 for (j = 0; j < len; j++)
460 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
464 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
465 struct encode_state *encode_state,
466 struct intel_encoder_context *encoder_context)
468 VAEncSequenceParameterBufferH264 *pSeqParameter =
469 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
470 VAEncPictureParameterBufferH264 *pPicParameter =
471 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
473 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
474 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
475 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
476 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
477 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
478 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
482 VAIQMatrixBufferH264 *qm;
483 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
484 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
486 for (i = 0; i < 3; i++)
487 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
488 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
490 for (i = 3; i < 6; i++)
491 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
492 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
494 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
495 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
497 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
498 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
503 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
504 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
505 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
506 struct intel_batchbuffer *batch)
509 batch = encoder_context->base.batch;
511 if (data_bits_in_last_dw == 0)
512 data_bits_in_last_dw = 32;
514 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
516 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
518 (0 << 16) | /* always start at offset 0 */
519 (data_bits_in_last_dw << 8) |
520 (skip_emul_byte_count << 4) |
521 (!!emulation_flag << 3) |
522 ((!!is_last_header) << 2) |
523 ((!!is_end_of_slice) << 1) |
524 (0 << 0)); /* FIXME: ??? */
525 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
527 ADVANCE_BCS_BATCH(batch);
531 static void gen8_mfc_init(VADriverContextP ctx,
532 struct encode_state *encode_state,
533 struct intel_encoder_context *encoder_context)
535 struct i965_driver_data *i965 = i965_driver_data(ctx);
536 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
539 int width_in_mbs = 0;
540 int height_in_mbs = 0;
541 int slice_batchbuffer_size;
543 if (encoder_context->codec == CODEC_H264 ||
544 encoder_context->codec == CODEC_H264_MVC) {
545 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
546 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
547 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
548 } else if (encoder_context->codec == CODEC_MPEG2) {
549 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
551 assert(encoder_context->codec == CODEC_MPEG2);
553 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
554 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
556 assert(encoder_context->codec == CODEC_JPEG);
557 VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
559 width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
560 height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
563 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
564 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
566 /*Encode common setup for MFC*/
567 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
568 mfc_context->post_deblocking_output.bo = NULL;
570 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
571 mfc_context->pre_deblocking_output.bo = NULL;
573 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
574 mfc_context->uncompressed_picture_source.bo = NULL;
576 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
577 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
579 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
580 if (mfc_context->direct_mv_buffers[i].bo != NULL)
581 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
582 mfc_context->direct_mv_buffers[i].bo = NULL;
585 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
586 if (mfc_context->reference_surfaces[i].bo != NULL)
587 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
588 mfc_context->reference_surfaces[i].bo = NULL;
591 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
592 bo = dri_bo_alloc(i965->intel.bufmgr,
597 mfc_context->intra_row_store_scratch_buffer.bo = bo;
599 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
600 bo = dri_bo_alloc(i965->intel.bufmgr,
602 width_in_mbs * height_in_mbs * 16,
605 mfc_context->macroblock_status_buffer.bo = bo;
607 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
608 bo = dri_bo_alloc(i965->intel.bufmgr,
610 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
613 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
615 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
616 bo = dri_bo_alloc(i965->intel.bufmgr,
618 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
621 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
623 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
624 mfc_context->mfc_batchbuffer_surface.bo = NULL;
626 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
627 mfc_context->aux_batchbuffer_surface.bo = NULL;
629 if (mfc_context->aux_batchbuffer)
630 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
632 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
633 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
634 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
635 mfc_context->aux_batchbuffer_surface.pitch = 16;
636 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
637 mfc_context->aux_batchbuffer_surface.size_block = 16;
639 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
643 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
644 struct intel_encoder_context *encoder_context)
646 struct i965_driver_data *i965 = i965_driver_data(ctx);
647 struct intel_batchbuffer *batch = encoder_context->base.batch;
648 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
651 BEGIN_BCS_BATCH(batch, 61);
653 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
655 /* the DW1-3 is for pre_deblocking */
656 if (mfc_context->pre_deblocking_output.bo)
657 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
658 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
661 OUT_BCS_BATCH(batch, 0); /* pre output addr */
663 OUT_BCS_BATCH(batch, 0);
664 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
665 /* the DW4-6 is for the post_deblocking */
667 if (mfc_context->post_deblocking_output.bo)
668 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
669 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670 0); /* post output addr */
672 OUT_BCS_BATCH(batch, 0);
674 OUT_BCS_BATCH(batch, 0);
675 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
677 /* the DW7-9 is for the uncompressed_picture */
678 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
679 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
680 0); /* uncompressed data */
682 OUT_BCS_BATCH(batch, 0);
683 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
685 /* the DW10-12 is for the mb status */
686 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
687 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
688 0); /* StreamOut data*/
690 OUT_BCS_BATCH(batch, 0);
691 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
693 /* the DW13-15 is for the intra_row_store_scratch */
694 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
695 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
698 OUT_BCS_BATCH(batch, 0);
699 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
701 /* the DW16-18 is for the deblocking filter */
702 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
703 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
706 OUT_BCS_BATCH(batch, 0);
707 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
709 /* the DW 19-50 is for Reference pictures*/
710 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
711 if ( mfc_context->reference_surfaces[i].bo != NULL) {
712 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
713 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
716 OUT_BCS_BATCH(batch, 0);
719 OUT_BCS_BATCH(batch, 0);
722 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
724 /* The DW 52-54 is for the MB status buffer */
725 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
726 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
727 0); /* Macroblock status buffer*/
729 OUT_BCS_BATCH(batch, 0);
730 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
732 /* the DW 55-57 is the ILDB buffer */
733 OUT_BCS_BATCH(batch, 0);
734 OUT_BCS_BATCH(batch, 0);
735 OUT_BCS_BATCH(batch, 0);
737 /* the DW 58-60 is the second ILDB buffer */
738 OUT_BCS_BATCH(batch, 0);
739 OUT_BCS_BATCH(batch, 0);
740 OUT_BCS_BATCH(batch, 0);
742 ADVANCE_BCS_BATCH(batch);
746 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
747 struct intel_encoder_context *encoder_context)
749 struct i965_driver_data *i965 = i965_driver_data(ctx);
750 struct intel_batchbuffer *batch = encoder_context->base.batch;
751 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
755 BEGIN_BCS_BATCH(batch, 71);
757 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
759 /* Reference frames and Current frames */
760 /* the DW1-32 is for the direct MV for reference */
761 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
762 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
763 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
764 I915_GEM_DOMAIN_INSTRUCTION, 0,
766 OUT_BCS_BATCH(batch, 0);
768 OUT_BCS_BATCH(batch, 0);
769 OUT_BCS_BATCH(batch, 0);
773 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
775 /* the DW34-36 is the MV for the current reference */
776 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
777 I915_GEM_DOMAIN_INSTRUCTION, 0,
780 OUT_BCS_BATCH(batch, 0);
781 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
784 for(i = 0; i < 32; i++) {
785 OUT_BCS_BATCH(batch, i/2);
787 OUT_BCS_BATCH(batch, 0);
788 OUT_BCS_BATCH(batch, 0);
790 ADVANCE_BCS_BATCH(batch);
795 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
796 struct intel_encoder_context *encoder_context)
798 struct i965_driver_data *i965 = i965_driver_data(ctx);
799 struct intel_batchbuffer *batch = encoder_context->base.batch;
800 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
802 BEGIN_BCS_BATCH(batch, 10);
804 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
805 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
806 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808 OUT_BCS_BATCH(batch, 0);
809 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
811 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
812 OUT_BCS_BATCH(batch, 0);
813 OUT_BCS_BATCH(batch, 0);
814 OUT_BCS_BATCH(batch, 0);
816 /* the DW7-9 is for Bitplane Read Buffer Base Address */
817 OUT_BCS_BATCH(batch, 0);
818 OUT_BCS_BATCH(batch, 0);
819 OUT_BCS_BATCH(batch, 0);
821 ADVANCE_BCS_BATCH(batch);
825 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
826 struct encode_state *encode_state,
827 struct intel_encoder_context *encoder_context)
829 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
831 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
832 mfc_context->set_surface_state(ctx, encoder_context);
833 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
834 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
835 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
836 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
837 mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
838 mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
839 gen8_mfc_avc_directmode_state(ctx, encoder_context);
840 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
844 static VAStatus gen8_mfc_run(VADriverContextP ctx,
845 struct encode_state *encode_state,
846 struct intel_encoder_context *encoder_context)
848 struct intel_batchbuffer *batch = encoder_context->base.batch;
850 intel_batchbuffer_flush(batch); //run the pipeline
852 return VA_STATUS_SUCCESS;
857 gen8_mfc_stop(VADriverContextP ctx,
858 struct encode_state *encode_state,
859 struct intel_encoder_context *encoder_context,
860 int *encoded_bits_size)
862 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
863 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
864 VACodedBufferSegment *coded_buffer_segment;
866 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
867 assert(vaStatus == VA_STATUS_SUCCESS);
868 *encoded_bits_size = coded_buffer_segment->size * 8;
869 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
871 return VA_STATUS_SUCCESS;
876 gen8_mfc_avc_slice_state(VADriverContextP ctx,
877 VAEncPictureParameterBufferH264 *pic_param,
878 VAEncSliceParameterBufferH264 *slice_param,
879 struct encode_state *encode_state,
880 struct intel_encoder_context *encoder_context,
881 int rate_control_enable,
883 struct intel_batchbuffer *batch)
885 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
886 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
887 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
888 int beginmb = slice_param->macroblock_address;
889 int endmb = beginmb + slice_param->num_macroblocks;
890 int beginx = beginmb % width_in_mbs;
891 int beginy = beginmb / width_in_mbs;
892 int nextx = endmb % width_in_mbs;
893 int nexty = endmb / width_in_mbs;
894 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
895 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
897 unsigned char correct[6], grow, shrink;
899 int weighted_pred_idc = 0;
900 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
901 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
902 int num_ref_l0 = 0, num_ref_l1 = 0;
905 batch = encoder_context->base.batch;
907 if (slice_type == SLICE_TYPE_I) {
908 luma_log2_weight_denom = 0;
909 chroma_log2_weight_denom = 0;
910 } else if (slice_type == SLICE_TYPE_P) {
911 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
912 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
914 if (slice_param->num_ref_idx_active_override_flag)
915 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
916 } else if (slice_type == SLICE_TYPE_B) {
917 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
918 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
919 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
921 if (slice_param->num_ref_idx_active_override_flag) {
922 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
923 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
926 if (weighted_pred_idc == 2) {
927 /* 8.4.3 - Derivation process for prediction weights (8-279) */
928 luma_log2_weight_denom = 5;
929 chroma_log2_weight_denom = 5;
933 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
934 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
936 for (i = 0; i < 6; i++)
937 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
939 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
940 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
941 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
942 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
944 BEGIN_BCS_BATCH(batch, 11);;
946 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
947 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
952 (chroma_log2_weight_denom << 8) |
953 (luma_log2_weight_denom << 0));
956 (weighted_pred_idc << 30) |
957 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
958 (slice_param->disable_deblocking_filter_idc << 27) |
959 (slice_param->cabac_init_idc << 24) |
960 (qp<<16) | /*Slice Quantization Parameter*/
961 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
962 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
964 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
966 slice_param->macroblock_address );
967 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
969 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
970 (1 << 30) | /*ResetRateControlCounter*/
971 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
972 (4 << 24) | /*RC Stable Tolerance, middle level*/
973 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
974 (0 << 22) | /*QP mode, don't modfiy CBP*/
975 (0 << 21) | /*MB Type Direct Conversion Enabled*/
976 (0 << 20) | /*MB Type Skip Conversion Enabled*/
977 (last_slice << 19) | /*IsLastSlice*/
978 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
979 (1 << 17) | /*HeaderPresentFlag*/
980 (1 << 16) | /*SliceData PresentFlag*/
981 (1 << 15) | /*TailPresentFlag*/
982 (1 << 13) | /*RBSP NAL TYPE*/
983 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
984 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
986 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
987 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
997 OUT_BCS_BATCH(batch, 0);
999 ADVANCE_BCS_BATCH(batch);
1002 #define AVC_INTRA_RDO_OFFSET 4
1003 #define AVC_INTER_RDO_OFFSET 10
1004 #define AVC_INTER_MSG_OFFSET 8
1005 #define AVC_INTER_MV_OFFSET 48
1006 #define AVC_RDO_MASK 0xFFFF
1009 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1010 int qp,unsigned int *msg,
1011 struct intel_encoder_context *encoder_context,
1012 unsigned char target_mb_size, unsigned char max_mb_size,
1013 struct intel_batchbuffer *batch)
1015 int len_in_dwords = 12;
1016 unsigned int intra_msg;
1017 #define INTRA_MSG_FLAG (1 << 13)
1018 #define INTRA_MBTYPE_MASK (0x1F0000)
1020 batch = encoder_context->base.batch;
1022 BEGIN_BCS_BATCH(batch, len_in_dwords);
1024 intra_msg = msg[0] & 0xC0FF;
1025 intra_msg |= INTRA_MSG_FLAG;
1026 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1027 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1028 OUT_BCS_BATCH(batch, 0);
1029 OUT_BCS_BATCH(batch, 0);
1030 OUT_BCS_BATCH(batch,
1031 (0 << 24) | /* PackedMvNum, Debug*/
1032 (0 << 20) | /* No motion vector */
1033 (1 << 19) | /* CbpDcY */
1034 (1 << 18) | /* CbpDcU */
1035 (1 << 17) | /* CbpDcV */
1038 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1039 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1040 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1042 /*Stuff for Intra MB*/
1043 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1044 OUT_BCS_BATCH(batch, msg[2]);
1045 OUT_BCS_BATCH(batch, msg[3]&0xFF);
1047 /*MaxSizeInWord and TargetSzieInWord*/
1048 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1049 (target_mb_size << 16) );
1051 OUT_BCS_BATCH(batch, 0);
1053 ADVANCE_BCS_BATCH(batch);
1055 return len_in_dwords;
1059 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1060 unsigned int *msg, unsigned int offset,
1061 struct intel_encoder_context *encoder_context,
1062 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1063 struct intel_batchbuffer *batch)
1065 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1066 int len_in_dwords = 12;
1067 unsigned int inter_msg = 0;
1069 batch = encoder_context->base.batch;
1071 #define MSG_MV_OFFSET 4
1072 unsigned int *mv_ptr;
1073 mv_ptr = msg + MSG_MV_OFFSET;
1074 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1075 * to convert them to be compatible with the format of AVC_PAK
1078 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1079 /* MV[0] and MV[2] are replicated */
1080 mv_ptr[4] = mv_ptr[0];
1081 mv_ptr[5] = mv_ptr[1];
1082 mv_ptr[2] = mv_ptr[8];
1083 mv_ptr[3] = mv_ptr[9];
1084 mv_ptr[6] = mv_ptr[8];
1085 mv_ptr[7] = mv_ptr[9];
1086 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1087 /* MV[0] and MV[1] are replicated */
1088 mv_ptr[2] = mv_ptr[0];
1089 mv_ptr[3] = mv_ptr[1];
1090 mv_ptr[4] = mv_ptr[16];
1091 mv_ptr[5] = mv_ptr[17];
1092 mv_ptr[6] = mv_ptr[24];
1093 mv_ptr[7] = mv_ptr[25];
1094 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1095 !(msg[1] & SUBMB_SHAPE_MASK)) {
1096 /* Don't touch MV[0] or MV[1] */
1097 mv_ptr[2] = mv_ptr[8];
1098 mv_ptr[3] = mv_ptr[9];
1099 mv_ptr[4] = mv_ptr[16];
1100 mv_ptr[5] = mv_ptr[17];
1101 mv_ptr[6] = mv_ptr[24];
1102 mv_ptr[7] = mv_ptr[25];
1106 BEGIN_BCS_BATCH(batch, len_in_dwords);
1108 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1112 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1113 if (msg[1] & SUBMB_SHAPE_MASK)
1116 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1117 OUT_BCS_BATCH(batch, offset);
1118 inter_msg = msg[0] & (0x1F00FFFF);
1119 inter_msg |= INTER_MV8;
1120 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1121 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1122 (msg[1] & SUBMB_SHAPE_MASK)) {
1123 inter_msg |= INTER_MV32;
1126 OUT_BCS_BATCH(batch, inter_msg);
1128 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1129 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1131 if ( slice_type == SLICE_TYPE_B) {
1132 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1134 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1137 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1140 inter_msg = msg[1] >> 8;
1141 /*Stuff for Inter MB*/
1142 OUT_BCS_BATCH(batch, inter_msg);
1143 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1144 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1146 /*MaxSizeInWord and TargetSzieInWord*/
1147 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1148 (target_mb_size << 16) );
1150 OUT_BCS_BATCH(batch, 0x0);
1152 ADVANCE_BCS_BATCH(batch);
1154 return len_in_dwords;
1158 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1159 struct encode_state *encode_state,
1160 struct intel_encoder_context *encoder_context,
1162 struct intel_batchbuffer *slice_batch)
1164 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1165 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1166 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1167 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1168 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1169 unsigned int *msg = NULL, offset = 0;
1170 unsigned char *msg_ptr = NULL;
1171 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1172 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1173 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1175 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1176 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1177 unsigned int tail_data[] = { 0x0, 0x0 };
1178 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1179 int is_intra = slice_type == SLICE_TYPE_I;
1184 if (rate_control_mode != VA_RC_CQP) {
1185 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1186 if (encode_state->slice_header_index[slice_index] == 0) {
1187 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1192 /* only support for 8-bit pixel bit-depth */
1193 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1194 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1195 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1196 assert(qp >= 0 && qp < 52);
1198 gen8_mfc_avc_slice_state(ctx,
1201 encode_state, encoder_context,
1202 (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
1204 if ( slice_index == 0)
1205 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1207 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1209 dri_bo_map(vme_context->vme_output.bo , 1);
1210 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1213 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1215 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1218 for (i = pSliceParameter->macroblock_address;
1219 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1220 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1221 x = i % width_in_mbs;
1222 y = i / width_in_mbs;
1223 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1224 if (vme_context->roi_enabled) {
1225 qp_mb = *(vme_context->qp_per_mb + i);
1231 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1233 int inter_rdo, intra_rdo;
1234 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1235 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1236 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1237 if (intra_rdo < inter_rdo) {
1238 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1240 msg += AVC_INTER_MSG_OFFSET;
1241 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1246 dri_bo_unmap(vme_context->vme_output.bo);
1249 mfc_context->insert_object(ctx, encoder_context,
1251 2, 1, 1, 0, slice_batch);
1253 mfc_context->insert_object(ctx, encoder_context,
1255 1, 1, 1, 0, slice_batch);
1260 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1261 struct encode_state *encode_state,
1262 struct intel_encoder_context *encoder_context)
1264 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1265 struct intel_batchbuffer *batch;
1269 batch = mfc_context->aux_batchbuffer;
1270 batch_bo = batch->buffer;
1271 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1272 gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1275 intel_batchbuffer_align(batch, 8);
1277 BEGIN_BCS_BATCH(batch, 2);
1278 OUT_BCS_BATCH(batch, 0);
1279 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1280 ADVANCE_BCS_BATCH(batch);
1282 dri_bo_reference(batch_bo);
1283 intel_batchbuffer_free(batch);
1284 mfc_context->aux_batchbuffer = NULL;
1291 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1292 struct encode_state *encode_state,
1293 struct intel_encoder_context *encoder_context)
1295 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1296 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298 assert(vme_context->vme_output.bo);
1299 mfc_context->buffer_suface_setup(ctx,
1300 &mfc_context->gpe_context,
1301 &vme_context->vme_output,
1302 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1303 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1307 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1308 struct encode_state *encode_state,
1309 struct intel_encoder_context *encoder_context)
1311 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1312 assert(mfc_context->aux_batchbuffer_surface.bo);
1313 mfc_context->buffer_suface_setup(ctx,
1314 &mfc_context->gpe_context,
1315 &mfc_context->aux_batchbuffer_surface,
1316 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1317 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1321 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1322 struct encode_state *encode_state,
1323 struct intel_encoder_context *encoder_context)
1325 gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1326 gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1330 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1331 struct encode_state *encode_state,
1332 struct intel_encoder_context *encoder_context)
1334 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1335 struct gen8_interface_descriptor_data *desc;
1338 unsigned char *desc_ptr;
1340 bo = mfc_context->gpe_context.idrt.bo;
1342 assert(bo->virtual);
1343 desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;
1345 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1347 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1348 struct i965_kernel *kernel;
1349 kernel = &mfc_context->gpe_context.kernels[i];
1350 assert(sizeof(*desc) == 32);
1351 /*Setup the descritor table*/
1352 memset(desc, 0, sizeof(*desc));
1353 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1354 desc->desc3.sampler_count = 0;
1355 desc->desc3.sampler_state_pointer = 0;
1356 desc->desc4.binding_table_entry_count = 1;
1357 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1358 desc->desc5.constant_urb_entry_read_offset = 0;
1359 desc->desc5.constant_urb_entry_read_length = 4;
1371 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1372 struct encode_state *encode_state,
1373 struct intel_encoder_context *encoder_context)
1375 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1380 #define AVC_PAK_LEN_IN_BYTE 48
1381 #define AVC_PAK_LEN_IN_OWORD 3
1384 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1385 uint32_t intra_flag,
1397 uint32_t temp_value;
1398 BEGIN_BATCH(batch, 14);
1400 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1401 OUT_BATCH(batch, 0);
1402 OUT_BATCH(batch, 0);
1403 OUT_BATCH(batch, 0);
1404 OUT_BATCH(batch, 0);
1405 OUT_BATCH(batch, 0);
1408 OUT_BATCH(batch, head_offset / 16);
1409 OUT_BATCH(batch, (intra_flag) | (qp << 16));
1410 temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1411 OUT_BATCH(batch, temp_value);
1413 OUT_BATCH(batch, number_mb_cmds);
1416 ((slice_end_y << 8) | (slice_end_x)));
1417 OUT_BATCH(batch, fwd_ref);
1418 OUT_BATCH(batch, bwd_ref);
1420 OUT_BATCH(batch, MI_NOOP);
1422 ADVANCE_BATCH(batch);
1426 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1427 struct intel_encoder_context *encoder_context,
1428 VAEncSliceParameterBufferH264 *slice_param,
1433 struct intel_batchbuffer *batch = encoder_context->base.batch;
1434 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1435 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1436 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1437 int total_mbs = slice_param->num_macroblocks;
1438 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1439 int number_mb_cmds = 128;
1440 int starting_offset = 0;
1442 int last_mb, slice_end_x, slice_end_y;
1443 int remaining_mb = total_mbs;
1444 uint32_t fwd_ref , bwd_ref, mb_flag;
1446 int number_roi_mbs, max_mb_cmds, i;
1448 last_mb = slice_param->macroblock_address + total_mbs - 1;
1449 slice_end_x = last_mb % width_in_mbs;
1450 slice_end_y = last_mb / width_in_mbs;
1452 if (slice_type == SLICE_TYPE_I) {
1457 fwd_ref = vme_context->ref_index_in_mb[0];
1458 bwd_ref = vme_context->ref_index_in_mb[1];
1462 if (width_in_mbs >= 100) {
1463 number_mb_cmds = width_in_mbs / 5;
1464 } else if (width_in_mbs >= 80) {
1465 number_mb_cmds = width_in_mbs / 4;
1466 } else if (width_in_mbs >= 60) {
1467 number_mb_cmds = width_in_mbs / 3;
1468 } else if (width_in_mbs >= 40) {
1469 number_mb_cmds = width_in_mbs / 2;
1471 number_mb_cmds = width_in_mbs;
1474 max_mb_cmds = number_mb_cmds;
1477 mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1478 mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1480 number_mb_cmds = max_mb_cmds;
1481 if (vme_context->roi_enabled) {
1484 tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1485 for (i = 1; i < max_mb_cmds; i++) {
1486 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1492 number_mb_cmds = number_roi_mbs;
1496 if (number_mb_cmds >= remaining_mb) {
1497 number_mb_cmds = remaining_mb;
1500 gen8_mfc_batchbuffer_emit_object_command(batch,
1513 head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1514 remaining_mb -= number_mb_cmds;
1515 starting_offset += number_mb_cmds;
1516 } while (remaining_mb > 0);
1520 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1521 struct encode_state *encode_state,
1522 struct intel_encoder_context *encoder_context,
1525 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1526 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1527 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1528 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1529 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1530 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1531 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1532 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1533 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1534 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1535 unsigned int tail_data[] = { 0x0, 0x0 };
1537 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1541 if (rate_control_mode != VA_RC_CQP) {
1542 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1543 if (encode_state->slice_header_index[slice_index] == 0) {
1544 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1549 /* only support for 8-bit pixel bit-depth */
1550 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1551 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1552 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1553 assert(qp >= 0 && qp < 52);
1555 gen8_mfc_avc_slice_state(ctx,
1560 (rate_control_mode != VA_RC_CQP),
1564 if (slice_index == 0)
1565 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1567 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1569 intel_batchbuffer_align(slice_batch, 64); /* aligned by an Cache-line */
1570 head_offset = intel_batchbuffer_used_size(slice_batch);
1572 slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1574 gen8_mfc_avc_batchbuffer_slice_command(ctx,
1582 /* Aligned for tail */
1583 intel_batchbuffer_align(slice_batch, 64); /* aligned by Cache-line */
1585 mfc_context->insert_object(ctx,
1596 mfc_context->insert_object(ctx,
1612 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1613 struct encode_state *encode_state,
1614 struct intel_encoder_context *encoder_context)
1616 struct i965_driver_data *i965 = i965_driver_data(ctx);
1617 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1618 struct intel_batchbuffer *batch = encoder_context->base.batch;
1621 intel_batchbuffer_start_atomic(batch, 0x4000);
1623 if (IS_GEN9(i965->intel.device_info))
1624 gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1626 gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1628 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1629 gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1632 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1634 intel_batchbuffer_align(slice_batch, 8);
1635 BEGIN_BCS_BATCH(slice_batch, 2);
1636 OUT_BCS_BATCH(slice_batch, 0);
1637 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1638 ADVANCE_BCS_BATCH(slice_batch);
1640 BEGIN_BATCH(batch, 2);
1641 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1642 OUT_BATCH(batch, 0);
1643 ADVANCE_BATCH(batch);
1645 intel_batchbuffer_free(slice_batch);
1646 mfc_context->aux_batchbuffer = NULL;
1649 if (IS_GEN9(i965->intel.device_info))
1650 gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1652 intel_batchbuffer_end_atomic(batch);
1653 intel_batchbuffer_flush(batch);
1658 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1659 struct encode_state *encode_state,
1660 struct intel_encoder_context *encoder_context)
1662 gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1663 gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1664 gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1665 gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1669 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1670 struct encode_state *encode_state,
1671 struct intel_encoder_context *encoder_context)
1673 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1675 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1676 gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1678 return mfc_context->aux_batchbuffer_surface.bo;
1682 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1683 struct encode_state *encode_state,
1684 struct intel_encoder_context *encoder_context)
1686 struct intel_batchbuffer *batch = encoder_context->base.batch;
1687 dri_bo *slice_batch_bo;
1689 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1690 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1695 if (encoder_context->soft_batch_force)
1696 slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1698 slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1702 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1703 intel_batchbuffer_emit_mi_flush(batch);
1705 // picture level programing
1706 gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1708 BEGIN_BCS_BATCH(batch, 3);
1709 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1710 OUT_BCS_RELOC(batch,
1712 I915_GEM_DOMAIN_COMMAND, 0,
1714 OUT_BCS_BATCH(batch, 0);
1715 ADVANCE_BCS_BATCH(batch);
1718 intel_batchbuffer_end_atomic(batch);
1720 dri_bo_unreference(slice_batch_bo);
1725 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1726 struct encode_state *encode_state,
1727 struct intel_encoder_context *encoder_context)
1729 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1730 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1731 int current_frame_bits_size;
1735 gen8_mfc_init(ctx, encode_state, encoder_context);
1736 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1737 /*Programing bcs pipeline*/
1738 gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1739 gen8_mfc_run(ctx, encode_state, encoder_context);
1740 if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
1741 gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1742 sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1743 if (sts == BRC_NO_HRD_VIOLATION) {
1744 intel_mfc_hrd_context_update(encode_state, mfc_context);
1747 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1748 if (!mfc_context->hrd.violation_noted) {
1749 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1750 mfc_context->hrd.violation_noted = 1;
1752 return VA_STATUS_SUCCESS;
1759 return VA_STATUS_SUCCESS;
1767 va_to_gen8_mpeg2_picture_type[3] = {
1774 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1775 struct intel_encoder_context *encoder_context,
1776 struct encode_state *encode_state)
1778 struct intel_batchbuffer *batch = encoder_context->base.batch;
1779 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1780 VAEncPictureParameterBufferMPEG2 *pic_param;
1781 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1782 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1783 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1785 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1786 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1787 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1789 BEGIN_BCS_BATCH(batch, 13);
1790 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1791 OUT_BCS_BATCH(batch,
1792 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1793 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1794 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1795 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1796 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1797 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1798 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1799 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1800 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1801 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1802 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1803 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1804 OUT_BCS_BATCH(batch,
1805 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1806 va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1808 OUT_BCS_BATCH(batch,
1809 1 << 31 | /* slice concealment */
1810 (height_in_mbs - 1) << 16 |
1811 (width_in_mbs - 1));
1813 if (slice_param && slice_param->quantiser_scale_code >= 14)
1814 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1816 OUT_BCS_BATCH(batch, 0);
1818 OUT_BCS_BATCH(batch, 0);
1819 OUT_BCS_BATCH(batch,
1820 0xFFF << 16 | /* InterMBMaxSize */
1821 0xFFF << 0 | /* IntraMBMaxSize */
1823 OUT_BCS_BATCH(batch, 0);
1824 OUT_BCS_BATCH(batch, 0);
1825 OUT_BCS_BATCH(batch, 0);
1826 OUT_BCS_BATCH(batch, 0);
1827 OUT_BCS_BATCH(batch, 0);
1828 OUT_BCS_BATCH(batch, 0);
1829 ADVANCE_BCS_BATCH(batch);
1833 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1835 unsigned char intra_qm[64] = {
1836 8, 16, 19, 22, 26, 27, 29, 34,
1837 16, 16, 22, 24, 27, 29, 34, 37,
1838 19, 22, 26, 27, 29, 34, 34, 38,
1839 22, 22, 26, 27, 29, 34, 37, 40,
1840 22, 26, 27, 29, 32, 35, 40, 48,
1841 26, 27, 29, 32, 35, 40, 48, 58,
1842 26, 27, 29, 34, 38, 46, 56, 69,
1843 27, 29, 35, 38, 46, 56, 69, 83
1846 unsigned char non_intra_qm[64] = {
1847 16, 16, 16, 16, 16, 16, 16, 16,
1848 16, 16, 16, 16, 16, 16, 16, 16,
1849 16, 16, 16, 16, 16, 16, 16, 16,
1850 16, 16, 16, 16, 16, 16, 16, 16,
1851 16, 16, 16, 16, 16, 16, 16, 16,
1852 16, 16, 16, 16, 16, 16, 16, 16,
1853 16, 16, 16, 16, 16, 16, 16, 16,
1854 16, 16, 16, 16, 16, 16, 16, 16
1857 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1858 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1862 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1864 unsigned short intra_fqm[64] = {
1865 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1866 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1867 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1868 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1869 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1870 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1871 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1872 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1875 unsigned short non_intra_fqm[64] = {
1876 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1878 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1879 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1880 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1881 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1882 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1886 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1887 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1891 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1892 struct intel_encoder_context *encoder_context,
1894 int next_x, int next_y,
1895 int is_fisrt_slice_group,
1896 int is_last_slice_group,
1899 struct intel_batchbuffer *batch)
1901 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1904 batch = encoder_context->base.batch;
1906 BEGIN_BCS_BATCH(batch, 8);
1908 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1909 OUT_BCS_BATCH(batch,
1910 0 << 31 | /* MbRateCtrlFlag */
1911 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1912 1 << 17 | /* Insert Header before the first slice group data */
1913 1 << 16 | /* SliceData PresentFlag: always 1 */
1914 1 << 15 | /* TailPresentFlag: always 1 */
1915 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1916 !!intra_slice << 13 | /* IntraSlice */
1917 !!intra_slice << 12 | /* IntraSliceFlag */
1919 OUT_BCS_BATCH(batch,
1925 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1926 /* bitstream pointer is only loaded once for the first slice of a frame when
1927 * LoadSlicePointerFlag is 0
1929 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1930 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1931 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1932 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1934 ADVANCE_BCS_BATCH(batch);
1938 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1939 struct intel_encoder_context *encoder_context,
1941 int first_mb_in_slice,
1942 int last_mb_in_slice,
1943 int first_mb_in_slice_group,
1944 int last_mb_in_slice_group,
1947 int coded_block_pattern,
1948 unsigned char target_size_in_word,
1949 unsigned char max_size_in_word,
1950 struct intel_batchbuffer *batch)
1952 int len_in_dwords = 9;
1955 batch = encoder_context->base.batch;
1957 BEGIN_BCS_BATCH(batch, len_in_dwords);
1959 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1960 OUT_BCS_BATCH(batch,
1961 0 << 24 | /* PackedMvNum */
1962 0 << 20 | /* MvFormat */
1963 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1964 0 << 15 | /* TransformFlag: frame DCT */
1965 0 << 14 | /* FieldMbFlag */
1966 1 << 13 | /* IntraMbFlag */
1967 mb_type << 8 | /* MbType: Intra */
1968 0 << 2 | /* SkipMbFlag */
1969 0 << 0 | /* InterMbMode */
1971 OUT_BCS_BATCH(batch, y << 16 | x);
1972 OUT_BCS_BATCH(batch,
1973 max_size_in_word << 24 |
1974 target_size_in_word << 16 |
1975 coded_block_pattern << 6 | /* CBP */
1977 OUT_BCS_BATCH(batch,
1978 last_mb_in_slice << 31 |
1979 first_mb_in_slice << 30 |
1980 0 << 27 | /* EnableCoeffClamp */
1981 last_mb_in_slice_group << 26 |
1982 0 << 25 | /* MbSkipConvDisable */
1983 first_mb_in_slice_group << 24 |
1984 0 << 16 | /* MvFieldSelect */
1985 qp_scale_code << 0 |
1987 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1988 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1989 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1990 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1992 ADVANCE_BCS_BATCH(batch);
1994 return len_in_dwords;
1998 #define MPEG2_INTER_MV_OFFSET 48
2000 static struct _mv_ranges
2002 int low; /* in the unit of 1/2 pixel */
2003 int high; /* in the unit of 1/2 pixel */
2018 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2020 if (mv + pos * 16 * 2 < 0 ||
2021 mv + (pos + 1) * 16 * 2 > display_max * 2)
2024 if (f_code > 0 && f_code < 10) {
2025 if (mv < mv_ranges[f_code].low)
2026 mv = mv_ranges[f_code].low;
2028 if (mv > mv_ranges[f_code].high)
2029 mv = mv_ranges[f_code].high;
2036 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2037 struct encode_state *encode_state,
2038 struct intel_encoder_context *encoder_context,
2040 int width_in_mbs, int height_in_mbs,
2042 int first_mb_in_slice,
2043 int last_mb_in_slice,
2044 int first_mb_in_slice_group,
2045 int last_mb_in_slice_group,
2047 unsigned char target_size_in_word,
2048 unsigned char max_size_in_word,
2049 struct intel_batchbuffer *batch)
2051 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2052 int len_in_dwords = 9;
2053 short *mvptr, mvx0, mvy0, mvx1, mvy1;
2056 batch = encoder_context->base.batch;
2058 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2059 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2060 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2061 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2062 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2064 BEGIN_BCS_BATCH(batch, len_in_dwords);
2066 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2067 OUT_BCS_BATCH(batch,
2068 2 << 24 | /* PackedMvNum */
2069 7 << 20 | /* MvFormat */
2070 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
2071 0 << 15 | /* TransformFlag: frame DCT */
2072 0 << 14 | /* FieldMbFlag */
2073 0 << 13 | /* IntraMbFlag */
2074 1 << 8 | /* MbType: Frame-based */
2075 0 << 2 | /* SkipMbFlag */
2076 0 << 0 | /* InterMbMode */
2078 OUT_BCS_BATCH(batch, y << 16 | x);
2079 OUT_BCS_BATCH(batch,
2080 max_size_in_word << 24 |
2081 target_size_in_word << 16 |
2082 0x3f << 6 | /* CBP */
2084 OUT_BCS_BATCH(batch,
2085 last_mb_in_slice << 31 |
2086 first_mb_in_slice << 30 |
2087 0 << 27 | /* EnableCoeffClamp */
2088 last_mb_in_slice_group << 26 |
2089 0 << 25 | /* MbSkipConvDisable */
2090 first_mb_in_slice_group << 24 |
2091 0 << 16 | /* MvFieldSelect */
2092 qp_scale_code << 0 |
2095 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
2096 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
2097 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
2098 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
2100 ADVANCE_BCS_BATCH(batch);
2102 return len_in_dwords;
2106 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2107 struct encode_state *encode_state,
2108 struct intel_encoder_context *encoder_context,
2109 struct intel_batchbuffer *slice_batch)
2111 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2112 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2114 if (encode_state->packed_header_data[idx]) {
2115 VAEncPackedHeaderParameterBuffer *param = NULL;
2116 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2117 unsigned int length_in_bits;
2119 assert(encode_state->packed_header_param[idx]);
2120 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2121 length_in_bits = param->bit_length;
2123 mfc_context->insert_object(ctx,
2126 ALIGN(length_in_bits, 32) >> 5,
2127 length_in_bits & 0x1f,
2128 5, /* FIXME: check it */
2131 0, /* Needn't insert emulation bytes for MPEG-2 */
2135 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2137 if (encode_state->packed_header_data[idx]) {
2138 VAEncPackedHeaderParameterBuffer *param = NULL;
2139 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2140 unsigned int length_in_bits;
2142 assert(encode_state->packed_header_param[idx]);
2143 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2144 length_in_bits = param->bit_length;
2146 mfc_context->insert_object(ctx,
2149 ALIGN(length_in_bits, 32) >> 5,
2150 length_in_bits & 0x1f,
2151 5, /* FIXME: check it */
2154 0, /* Needn't insert emulation bytes for MPEG-2 */
2160 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2161 struct encode_state *encode_state,
2162 struct intel_encoder_context *encoder_context,
2164 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2165 struct intel_batchbuffer *slice_batch)
2167 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2168 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2169 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2170 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2171 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2172 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2173 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2174 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2176 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2177 unsigned int *msg = NULL;
2178 unsigned char *msg_ptr = NULL;
2180 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2181 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2182 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2183 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2185 dri_bo_map(vme_context->vme_output.bo , 0);
2186 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2188 if (next_slice_group_param) {
2189 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2190 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2192 h_next_start_pos = 0;
2193 v_next_start_pos = height_in_mbs;
2196 gen8_mfc_mpeg2_slicegroup_state(ctx,
2203 next_slice_group_param == NULL,
2204 slice_param->is_intra_slice,
2205 slice_param->quantiser_scale_code,
2208 if (slice_index == 0)
2209 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2211 /* Insert '00' to make sure the header is valid */
2212 mfc_context->insert_object(ctx,
2214 (unsigned int*)section_delimiter,
2216 8, /* 8bits in the last DWORD */
2223 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2224 /* PAK for each macroblocks */
2225 for (j = 0; j < slice_param->num_macroblocks; j++) {
2226 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2227 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2228 int first_mb_in_slice = (j == 0);
2229 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2230 int first_mb_in_slice_group = (i == 0 && j == 0);
2231 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2232 j == slice_param->num_macroblocks - 1);
2234 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2236 if (slice_param->is_intra_slice) {
2237 gen8_mfc_mpeg2_pak_object_intra(ctx,
2242 first_mb_in_slice_group,
2243 last_mb_in_slice_group,
2245 slice_param->quantiser_scale_code,
2251 int inter_rdo, intra_rdo;
2252 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2253 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2255 if (intra_rdo < inter_rdo)
2256 gen8_mfc_mpeg2_pak_object_intra(ctx,
2261 first_mb_in_slice_group,
2262 last_mb_in_slice_group,
2264 slice_param->quantiser_scale_code,
2270 gen8_mfc_mpeg2_pak_object_inter(ctx,
2274 width_in_mbs, height_in_mbs,
2278 first_mb_in_slice_group,
2279 last_mb_in_slice_group,
2280 slice_param->quantiser_scale_code,
2290 dri_bo_unmap(vme_context->vme_output.bo);
2293 if (next_slice_group_param == NULL) { /* end of a picture */
2294 mfc_context->insert_object(ctx,
2296 (unsigned int *)tail_delimiter,
2298 8, /* 8bits in the last DWORD */
2304 } else { /* end of a lsice group */
2305 mfc_context->insert_object(ctx,
2307 (unsigned int *)section_delimiter,
2309 8, /* 8bits in the last DWORD */
2319 * A batch buffer for all slices, including slice state,
2320 * slice insert object and slice pak object commands
2324 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2325 struct encode_state *encode_state,
2326 struct intel_encoder_context *encoder_context)
2328 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2329 struct intel_batchbuffer *batch;
2330 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2334 batch = mfc_context->aux_batchbuffer;
2335 batch_bo = batch->buffer;
2337 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2338 if (i == encode_state->num_slice_params_ext - 1)
2339 next_slice_group_param = NULL;
2341 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2343 gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2346 intel_batchbuffer_align(batch, 8);
2348 BEGIN_BCS_BATCH(batch, 2);
2349 OUT_BCS_BATCH(batch, 0);
2350 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2351 ADVANCE_BCS_BATCH(batch);
2353 dri_bo_reference(batch_bo);
2354 intel_batchbuffer_free(batch);
2355 mfc_context->aux_batchbuffer = NULL;
2361 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2362 struct encode_state *encode_state,
2363 struct intel_encoder_context *encoder_context)
2365 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2367 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2368 mfc_context->set_surface_state(ctx, encoder_context);
2369 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2370 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2371 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2372 gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2373 gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2374 gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2378 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2379 struct encode_state *encode_state,
2380 struct intel_encoder_context *encoder_context)
2382 struct intel_batchbuffer *batch = encoder_context->base.batch;
2383 dri_bo *slice_batch_bo;
2385 slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2388 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2389 intel_batchbuffer_emit_mi_flush(batch);
2391 // picture level programing
2392 gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2394 BEGIN_BCS_BATCH(batch, 4);
2395 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2396 OUT_BCS_RELOC(batch,
2398 I915_GEM_DOMAIN_COMMAND, 0,
2400 OUT_BCS_BATCH(batch, 0);
2401 OUT_BCS_BATCH(batch, 0);
2402 ADVANCE_BCS_BATCH(batch);
2405 intel_batchbuffer_end_atomic(batch);
2407 dri_bo_unreference(slice_batch_bo);
2411 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2412 struct encode_state *encode_state,
2413 struct intel_encoder_context *encoder_context)
2415 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2416 struct object_surface *obj_surface;
2417 struct object_buffer *obj_buffer;
2418 struct i965_coded_buffer_segment *coded_buffer_segment;
2419 VAStatus vaStatus = VA_STATUS_SUCCESS;
2423 /* reconstructed surface */
2424 obj_surface = encode_state->reconstructed_object;
2425 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2426 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2427 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2428 mfc_context->surface_state.width = obj_surface->orig_width;
2429 mfc_context->surface_state.height = obj_surface->orig_height;
2430 mfc_context->surface_state.w_pitch = obj_surface->width;
2431 mfc_context->surface_state.h_pitch = obj_surface->height;
2433 /* forward reference */
2434 obj_surface = encode_state->reference_objects[0];
2436 if (obj_surface && obj_surface->bo) {
2437 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2438 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2440 mfc_context->reference_surfaces[0].bo = NULL;
2442 /* backward reference */
2443 obj_surface = encode_state->reference_objects[1];
2445 if (obj_surface && obj_surface->bo) {
2446 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2447 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2449 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2451 if (mfc_context->reference_surfaces[1].bo)
2452 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2455 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2456 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2458 if (mfc_context->reference_surfaces[i].bo)
2459 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2462 /* input YUV surface */
2463 obj_surface = encode_state->input_yuv_object;
2464 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2465 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2468 obj_buffer = encode_state->coded_buf_object;
2469 bo = obj_buffer->buffer_store->bo;
2470 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2471 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2472 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2473 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2475 /* set the internal flag to 0 to indicate the coded size is unknown */
2477 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2478 coded_buffer_segment->mapped = 0;
2479 coded_buffer_segment->codec = encoder_context->codec;
2486 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2487 struct encode_state *encode_state,
2488 struct intel_encoder_context *encoder_context)
2490 gen8_mfc_init(ctx, encode_state, encoder_context);
2491 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2492 /*Programing bcs pipeline*/
2493 gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2494 gen8_mfc_run(ctx, encode_state, encoder_context);
2496 return VA_STATUS_SUCCESS;
2499 /* JPEG encode methods */
2502 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2503 struct encode_state *encode_state,
2504 struct intel_encoder_context *encoder_context)
2506 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2507 struct object_surface *obj_surface;
2508 struct object_buffer *obj_buffer;
2509 struct i965_coded_buffer_segment *coded_buffer_segment;
2510 VAStatus vaStatus = VA_STATUS_SUCCESS;
2513 /* input YUV surface */
2514 obj_surface = encode_state->input_yuv_object;
2515 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2516 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2519 obj_buffer = encode_state->coded_buf_object;
2520 bo = obj_buffer->buffer_store->bo;
2521 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2522 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2523 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2524 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2526 /* set the internal flag to 0 to indicate the coded size is unknown */
2528 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2529 coded_buffer_segment->mapped = 0;
2530 coded_buffer_segment->codec = encoder_context->codec;
2538 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2539 struct intel_encoder_context *encoder_context,
2540 struct encode_state *encode_state)
2542 struct intel_batchbuffer *batch = encoder_context->base.batch;
2543 struct object_surface *obj_surface = encode_state->input_yuv_object;
2544 unsigned int input_fourcc;
2545 unsigned int y_cb_offset;
2546 unsigned int y_cr_offset;
2547 unsigned int surface_format;
2549 assert(obj_surface);
2551 y_cb_offset = obj_surface->y_cb_offset;
2552 y_cr_offset = obj_surface->y_cr_offset;
2553 input_fourcc = obj_surface->fourcc;
2555 surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2556 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2559 switch (input_fourcc) {
2560 case VA_FOURCC_Y800: {
2561 surface_format = MFX_SURFACE_MONOCHROME;
2564 case VA_FOURCC_NV12: {
2565 surface_format = MFX_SURFACE_PLANAR_420_8;
2568 case VA_FOURCC_UYVY: {
2569 surface_format = MFX_SURFACE_YCRCB_SWAPY;
2572 case VA_FOURCC_YUY2: {
2573 surface_format = MFX_SURFACE_YCRCB_NORMAL;
2576 case VA_FOURCC_RGBA:
2577 case VA_FOURCC_444P: {
2578 surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2583 BEGIN_BCS_BATCH(batch, 6);
2585 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2586 OUT_BCS_BATCH(batch, 0);
2587 OUT_BCS_BATCH(batch,
2588 ((obj_surface->orig_height - 1) << 18) |
2589 ((obj_surface->orig_width - 1) << 4));
2590 OUT_BCS_BATCH(batch,
2591 (surface_format << 28) | /* Surface Format */
2592 (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2593 (0 << 22) | /* surface object control state, FIXME??? */
2594 ((obj_surface->width - 1) << 3) | /* pitch */
2595 (0 << 2) | /* must be 0 for interleave U/V */
2596 (1 << 1) | /* must be tiled */
2597 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2598 OUT_BCS_BATCH(batch,
2599 (0 << 16) | /* X offset for U(Cb), must be 0 */
2600 (y_cb_offset << 0)); /* Y offset for U(Cb) */
2601 OUT_BCS_BATCH(batch,
2602 (0 << 16) | /* X offset for V(Cr), must be 0 */
2603 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2606 ADVANCE_BCS_BATCH(batch);
2610 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2611 struct intel_encoder_context *encoder_context,
2612 struct encode_state *encode_state)
2614 struct intel_batchbuffer *batch = encoder_context->base.batch;
2615 struct object_surface *obj_surface = encode_state->input_yuv_object;
2616 VAEncPictureParameterBufferJPEG *pic_param;
2617 unsigned int surface_format;
2618 unsigned int frame_width_in_blks;
2619 unsigned int frame_height_in_blks;
2620 unsigned int pixels_in_horizontal_lastMCU;
2621 unsigned int pixels_in_vertical_lastMCU;
2622 unsigned int input_surface_format;
2623 unsigned int output_mcu_format;
2624 unsigned int picture_width;
2625 unsigned int picture_height;
2627 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2628 assert(obj_surface);
2629 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2630 surface_format = obj_surface->fourcc;
2631 picture_width = pic_param->picture_width;
2632 picture_height = pic_param->picture_height;
2634 switch (surface_format) {
2635 case VA_FOURCC_Y800: {
2636 input_surface_format = JPEG_ENC_SURFACE_Y8;
2637 output_mcu_format = JPEG_ENC_MCU_YUV400;
2640 case VA_FOURCC_NV12: {
2641 input_surface_format = JPEG_ENC_SURFACE_NV12;
2642 output_mcu_format = JPEG_ENC_MCU_YUV420;
2645 case VA_FOURCC_UYVY: {
2646 input_surface_format = JPEG_ENC_SURFACE_UYVY;
2647 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2650 case VA_FOURCC_YUY2: {
2651 input_surface_format = JPEG_ENC_SURFACE_YUY2;
2652 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2656 case VA_FOURCC_RGBA:
2657 case VA_FOURCC_444P: {
2658 input_surface_format = JPEG_ENC_SURFACE_RGB;
2659 output_mcu_format = JPEG_ENC_MCU_RGB;
2663 input_surface_format = JPEG_ENC_SURFACE_NV12;
2664 output_mcu_format = JPEG_ENC_MCU_YUV420;
2670 switch (output_mcu_format) {
2672 case JPEG_ENC_MCU_YUV400:
2673 case JPEG_ENC_MCU_RGB: {
2674 pixels_in_horizontal_lastMCU = (picture_width % 8);
2675 pixels_in_vertical_lastMCU = (picture_height % 8);
2677 //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2678 frame_width_in_blks = ((picture_width + 7) / 8);
2679 frame_height_in_blks = ((picture_height + 7) / 8);
2683 case JPEG_ENC_MCU_YUV420: {
2684 if((picture_width % 2) == 0)
2685 pixels_in_horizontal_lastMCU = picture_width % 16;
2687 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2689 if((picture_height % 2) == 0)
2690 pixels_in_vertical_lastMCU = picture_height % 16;
2692 pixels_in_vertical_lastMCU = ((picture_height % 16) + 1) % 16;
2694 //H1=2,V1=2 for YUV420. So, compute these values accordingly
2695 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2696 frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2700 case JPEG_ENC_MCU_YUV422H_2Y: {
2701 if(picture_width % 2 == 0)
2702 pixels_in_horizontal_lastMCU = picture_width % 16;
2704 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2706 pixels_in_vertical_lastMCU = picture_height % 8;
2708 //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2709 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2710 frame_height_in_blks = ((picture_height + 7) / 8);
2715 BEGIN_BCS_BATCH(batch, 3);
2717 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2719 OUT_BCS_BATCH(batch,
2720 ( pixels_in_horizontal_lastMCU << 26) | /* Pixels In Horizontal Last MCU */
2721 ( pixels_in_vertical_lastMCU << 21) | /* Pixels In Vertical Last MCU */
2722 ( input_surface_format << 8) | /* Input Surface format */
2723 ( output_mcu_format << 0)); /* Output MCU Structure */
2725 OUT_BCS_BATCH(batch,
2726 ((frame_height_in_blks - 1) << 16) | /* Frame Height In Blks Minus 1 */
2727 (JPEG_ENC_ROUND_QUANT_DEFAULT << 13) | /* Rounding Quant set to default value 0 */
2728 ((frame_width_in_blks - 1) << 0)); /* Frame Width In Blks Minus 1 */
2729 ADVANCE_BCS_BATCH(batch);
2733 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2736 short reciprocal_qm[64];
2738 for(i=0; i<64; i++) {
2739 reciprocal_qm[i] = 65535/(raster_qm[i]);
2742 for(i=0; i<64; i++) {
2743 dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
2752 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2753 struct intel_encoder_context *encoder_context,
2754 struct encode_state *encode_state)
2756 unsigned int quality = 0;
2757 uint32_t temp, i = 0, j = 0, dword_qm[32];
2758 VAEncPictureParameterBufferJPEG *pic_param;
2759 VAQMatrixBufferJPEG *qmatrix;
2760 unsigned char raster_qm[64], column_raster_qm[64];
2761 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2763 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2764 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2765 quality = pic_param->quality;
2767 //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2768 //The app can send qmatrix for the first frame and not send for the subsequent frames
2769 if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
2770 qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2772 mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2773 memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2775 if(pic_param->num_components > 1) {
2776 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2777 memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2779 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2783 //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2784 qmatrix = &mfc_context->buffered_qmatrix;
2785 qmatrix->load_lum_quantiser_matrix = 1;
2786 qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2790 //As per the design, normalization of the quality factor and scaling of the Quantization tables
2791 //based on the quality factor needs to be done in the driver before sending the values to the HW.
2792 //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2793 //packed header information. The packed header is written as the header of the jpeg file. This
2794 //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2795 //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2796 //how to do this). QTables can be different for different applications. If no tables are provided,
2797 //the default tables in the driver are used.
2799 //Normalization of the quality factor
2800 if (quality > 100) quality=100;
2801 if (quality == 0) quality=1;
2802 quality = (quality < 50) ? (5000/quality) : (200 - (quality*2));
2804 //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2805 //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2806 //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2807 //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2810 if(qmatrix->load_lum_quantiser_matrix) {
2811 //apply quality to lum_quantiser_matrix
2812 for(i=0; i < 64; i++) {
2813 temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
2814 //clamp to range [1,255]
2815 temp = (temp > 255) ? 255 : temp;
2816 temp = (temp < 1) ? 1 : temp;
2817 qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2820 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2821 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2822 for (j = 0; j < 64; j++)
2823 raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2825 //Convert the raster order(row-ordered) to the column-raster (column by column).
2826 //To be consistent with the other encoders, send it in column order.
2827 //Need to double check if our HW expects col or row raster.
2828 for (j = 0; j < 64; j++) {
2829 int row = j / 8, col = j % 8;
2830 column_raster_qm[col * 8 + row] = raster_qm[j];
2833 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2834 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2836 //send the luma qm to the command buffer
2837 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2840 //For Chroma, if chroma exists (Cb, Cr or G, B)
2841 if(qmatrix->load_chroma_quantiser_matrix) {
2842 //apply quality to chroma_quantiser_matrix
2843 for(i=0; i < 64; i++) {
2844 temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
2845 //clamp to range [1,255]
2846 temp = (temp > 255) ? 255 : temp;
2847 temp = (temp < 1) ? 1 : temp;
2848 qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2851 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2852 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2853 for (j = 0; j < 64; j++)
2854 raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2856 //Convert the raster order(row-ordered) to the column-raster (column by column).
2857 //To be consistent with the other encoders, send it in column order.
2858 //Need to double check if our HW expects col or row raster.
2859 for (j = 0; j < 64; j++) {
2860 int row = j / 8, col = j % 8;
2861 column_raster_qm[col * 8 + row] = raster_qm[j];
2865 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2866 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2868 //send the same chroma qm to the command buffer (for both U,V or G,B)
2869 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2870 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2875 //Translation of Table K.5 into code: This method takes the huffval from the
2876 //Huffmantable buffer and converts into index for the coefficients and size tables
2877 uint8_t map_huffval_to_index(uint8_t huff_val)
2881 if(huff_val < 0xF0) {
2882 index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2884 index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2891 //Implementation of Flow chart Annex C - Figure C.1
2893 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2895 uint8_t i=1, j=1, k=0;
2898 while(j <= (uint8_t)bits[i-1]) {
2899 huff_size_table[k] = i;
2907 huff_size_table[k] = 0;
2911 //Implementation of Flow chart Annex C - Figure C.2
2913 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2917 uint8_t si=huff_size_table[k];
2919 while(huff_size_table[k] != 0) {
2921 while(huff_size_table[k] == si) {
2923 // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2924 if(code == 0xFFFF) {
2928 huff_code_table[k] = code;
2939 //Implementation of Flow chat Annex C - Figure C.3
2941 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2943 uint8_t huff_val_size=0, i=0, k=0;
2945 huff_val_size = (type == 0) ? 12 : 162;
2946 uint8_t huff_si_table[huff_val_size];
2947 uint16_t huff_co_table[huff_val_size];
2949 memset(huff_si_table, 0, sizeof(huff_si_table));
2950 memset(huff_co_table, 0, sizeof(huff_co_table));
2953 i = map_huffval_to_index(huff_vals[k]);
2954 huff_co_table[i] = huff_code_table[k];
2955 huff_si_table[i] = huff_size_table[k];
2959 memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2960 memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2964 //This method converts the huffman table to code words which is needed by the HW
2965 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2967 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2969 uint8_t lastK = 0, i=0;
2970 uint8_t huff_val_size = 0;
2971 uint8_t *huff_bits, *huff_vals;
2973 huff_val_size = (type == 0) ? 12 : 162;
2974 uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
2975 uint16_t huff_code_table[huff_val_size];
2977 memset(huff_size_table, 0, sizeof(huff_size_table));
2978 memset(huff_code_table, 0, sizeof(huff_code_table));
2980 huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2981 huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2984 //Generation of table of Huffman code sizes
2985 generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2987 //Generation of table of Huffman codes
2988 generate_huffman_codes_table(huff_size_table, huff_code_table);
2990 //Ordering procedure for encoding procedure code tables
2991 generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2993 //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2994 //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2995 for(i=0; i<huff_val_size; i++) {
2997 table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
3002 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
3004 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
3005 struct encode_state *encode_state,
3006 struct intel_encoder_context *encoder_context,
3009 VAHuffmanTableBufferJPEGBaseline *huff_buffer;
3010 struct intel_batchbuffer *batch = encoder_context->base.batch;
3012 uint32_t dc_table[12], ac_table[162];
3014 assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3015 huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3017 memset(dc_table, 0, 12);
3018 memset(ac_table, 0, 162);
3020 for (index = 0; index < num_tables; index++) {
3021 int id = va_to_gen7_jpeg_hufftable[index];
3023 if (!huff_buffer->load_huffman_table[index])
3026 //load DC table with 12 DWords
3027 convert_hufftable_to_codes(huff_buffer, dc_table, 0, index); //0 for Dc
3029 //load AC table with 162 DWords
3030 convert_hufftable_to_codes(huff_buffer, ac_table, 1, index); //1 for AC
3032 BEGIN_BCS_BATCH(batch, 176);
3033 OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3034 OUT_BCS_BATCH(batch, id); //Huff table id
3036 //DWord 2 - 13 has DC_TABLE
3037 intel_batchbuffer_data(batch, dc_table, 12*4);
3039 //Dword 14 -175 has AC_TABLE
3040 intel_batchbuffer_data(batch, ac_table, 162*4);
3041 ADVANCE_BCS_BATCH(batch);
3046 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3047 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3049 switch (surface_format) {
3050 case VA_FOURCC_Y800: {
3055 case VA_FOURCC_NV12: {
3060 case VA_FOURCC_UYVY: {
3065 case VA_FOURCC_YUY2: {
3070 case VA_FOURCC_RGBA:
3071 case VA_FOURCC_444P: {
3076 default : { //May be have to insert error handling here. For now just use as below
3084 //set MFC_JPEG_SCAN_OBJECT
3086 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3087 struct encode_state *encode_state,
3088 struct intel_encoder_context *encoder_context)
3090 uint32_t mcu_count, surface_format, Mx, My;
3091 uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
3092 uint8_t is_last_scan = 1; //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3093 uint8_t head_present_flag=1; //Header has tables and app data
3094 uint16_t num_components, restart_interval; //Specifies number of MCUs in an ECS.
3095 VAEncSliceParameterBufferJPEG *slice_param;
3096 VAEncPictureParameterBufferJPEG *pic_param;
3098 struct intel_batchbuffer *batch = encoder_context->base.batch;
3099 struct object_surface *obj_surface = encode_state->input_yuv_object;
3101 assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3102 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3103 assert(obj_surface);
3104 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3105 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3106 surface_format = obj_surface->fourcc;
3108 get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3110 // Mx = #MCUs in a row, My = #MCUs in a column
3111 Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
3112 My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
3113 mcu_count = (Mx * My);
3115 num_components = pic_param->num_components;
3116 restart_interval = slice_param->restart_interval;
3118 //Depending on number of components and values set for table selectors,
3119 //only those bits are set in 24:22 for AC table, 20:18 for DC table
3120 for(i=0; i<num_components; i++) {
3121 huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
3122 huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
3126 BEGIN_BCS_BATCH(batch, 3);
3128 OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3130 OUT_BCS_BATCH(batch, mcu_count << 0); //MCU Count
3132 OUT_BCS_BATCH(batch,
3133 (huff_ac_table << 22) | //Huffman AC Table
3134 (huff_dc_table << 18) | //Huffman DC Table
3135 (head_present_flag << 17) | //Head present flag
3136 (is_last_scan << 16) | //Is last scan
3137 (restart_interval << 0)); //Restart Interval
3138 ADVANCE_BCS_BATCH(batch);
3142 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3143 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3144 int is_end_of_slice)
3146 struct intel_batchbuffer *batch = encoder_context->base.batch;
3149 if (data_bits_in_last_dw == 0)
3150 data_bits_in_last_dw = 32;
3152 BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3154 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3156 OUT_BCS_BATCH(batch,
3157 (0 << 16) | //DataByteOffset 0 for JPEG Encoder
3158 (0 << 15) | //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3159 (data_bits_in_last_dw << 8) | //DataBitsInLastDW
3160 (0 << 4) | //SkipEmulByteCount 0 for JPEG Encoder
3161 (0 << 3) | //EmulationFlag 0 for JPEG Encoder
3162 ((!!is_last_header) << 2) | //LastHeaderFlag
3163 ((!!is_end_of_slice) << 1) | //EndOfSliceFlag
3164 (1 << 0)); //BitstreamStartReset 1 for JPEG Encoder
3166 intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
3168 ADVANCE_BCS_BATCH(batch);
3172 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3174 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3175 struct encode_state *encode_state,
3176 struct intel_encoder_context *encoder_context)
3178 if (encode_state->packed_header_data_ext) {
3179 VAEncPackedHeaderParameterBuffer *param = NULL;
3180 unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3181 unsigned int length_in_bits;
3183 param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3184 length_in_bits = param->bit_length;
3186 gen8_mfc_jpeg_pak_insert_object(encoder_context,
3188 ALIGN(length_in_bits, 32) >> 5,
3189 length_in_bits & 0x1f,
3195 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3196 //If the app sends the qmatrix, this will be replaced with the one app sends.
3198 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3201 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3203 //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3205 mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3208 mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3211 /* This is at the picture level */
3213 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3214 struct encode_state *encode_state,
3215 struct intel_encoder_context *encoder_context)
3217 int i, j, component, max_selector = 0;
3218 VAEncSliceParameterBufferJPEG *slice_param;
3220 gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3221 gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3222 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3223 gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3224 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3225 gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3227 //do the slice level encoding here
3228 gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3230 //I dont think I need this for loop. Just to be consistent with other encoding logic...
3231 for(i = 0; i < encode_state->num_slice_params_ext; i++) {
3232 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3233 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3235 for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3237 for(component = 0; component < slice_param->num_components; component++) {
3238 if(max_selector < slice_param->components[component].dc_table_selector)
3239 max_selector = slice_param->components[component].dc_table_selector;
3241 if (max_selector < slice_param->components[component].ac_table_selector)
3242 max_selector = slice_param->components[component].ac_table_selector;
3249 assert(max_selector < 2);
3250 //send the huffman table using MFC_JPEG_HUFF_TABLE
3251 gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
3252 //set MFC_JPEG_SCAN_OBJECT
3253 gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3254 //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3255 gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3260 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3261 struct encode_state *encode_state,
3262 struct intel_encoder_context *encoder_context)
3264 struct intel_batchbuffer *batch = encoder_context->base.batch;
3267 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3268 intel_batchbuffer_emit_mi_flush(batch);
3270 // picture level programing
3271 gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3274 intel_batchbuffer_end_atomic(batch);
3280 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3281 struct encode_state *encode_state,
3282 struct intel_encoder_context *encoder_context)
3284 gen8_mfc_init(ctx, encode_state, encoder_context);
3285 intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3286 /*Programing bcs pipeline*/
3287 gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3288 gen8_mfc_run(ctx, encode_state, encoder_context);
3290 return VA_STATUS_SUCCESS;
3293 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3294 struct gen6_mfc_context *mfc_context,
3295 int target_frame_size,
3298 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3299 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3300 unsigned int max_qindex = pic_param->clamp_qindex_high;
3301 unsigned int min_qindex = pic_param->clamp_qindex_low;
3302 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3303 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3305 int last_size_gap = -1;
3306 int per_mb_size_at_qindex;
3307 int target_qindex = min_qindex, i;
3309 /* make sure would not overflow*/
3310 if (target_frame_size >= (0x7fffffff >> 9))
3311 target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3313 target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3315 for (i = min_qindex; i <= max_qindex; i++) {
3316 per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3318 if (per_mb_size_at_qindex <= target_mb_size) {
3319 if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3324 last_size_gap = per_mb_size_at_qindex - target_mb_size;
3327 return target_qindex;
3330 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3331 struct intel_encoder_context* encoder_context)
3333 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3334 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3335 double bitrate = encoder_context->brc.bits_per_second[0];
3336 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
3337 int inum = 1, pnum = 0;
3338 int intra_period = seq_param->intra_period;
3339 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3340 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3341 int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
3343 pnum = intra_period - 1;
3345 mfc_context->brc.mode = encoder_context->rate_control_mode;
3347 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
3348 (double)(inum + BRC_PWEIGHT * pnum ));
3349 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3351 mfc_context->brc.gop_nums[0][SLICE_TYPE_I] = inum;
3352 mfc_context->brc.gop_nums[0][SLICE_TYPE_P] = pnum;
3354 mfc_context->brc.bits_per_frame[0] = bitrate / framerate;
3356 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3358 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3360 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3362 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3365 if (encoder_context->brc.hrd_buffer_size)
3366 mfc_context->hrd.buffer_size[0] = (double)encoder_context->brc.hrd_buffer_size;
3368 mfc_context->hrd.buffer_size[0] = bitrate;
3369 if (encoder_context->brc.hrd_initial_buffer_fullness &&
3370 encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size[0])
3371 mfc_context->hrd.current_buffer_fullness[0] = (double)encoder_context->brc.hrd_initial_buffer_fullness;
3373 mfc_context->hrd.current_buffer_fullness[0] = mfc_context->hrd.buffer_size[0] / 2.0;
3374 mfc_context->hrd.target_buffer_fullness[0] = (double)mfc_context->hrd.buffer_size[0] / 2.0;
3375 mfc_context->hrd.buffer_capacity[0] = (double)mfc_context->hrd.buffer_size[0] / max_frame_size;
3376 mfc_context->hrd.violation_noted = 0;
3379 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3380 struct intel_encoder_context *encoder_context,
3383 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3384 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3385 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3386 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3387 int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3388 int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3389 int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3390 int qp; // quantizer of previously encoded slice of current type
3391 int qpn; // predicted quantizer for next frame of current type in integer format
3392 double qpf; // predicted quantizer for next frame of current type in float format
3393 double delta_qp; // QP correction
3394 int target_frame_size, frame_size_next;
3396 * x - how far we are from HRD buffer borders
3397 * y - how far we are from target HRD buffer fullness
3400 double frame_size_alpha;
3401 unsigned int max_qindex = pic_param->clamp_qindex_high;
3402 unsigned int min_qindex = pic_param->clamp_qindex_low;
3404 qp = mfc_context->brc.qp_prime_y[0][slicetype];
3406 target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3407 if (mfc_context->hrd.buffer_capacity[0] < 5)
3408 frame_size_alpha = 0;
3410 frame_size_alpha = (double)mfc_context->brc.gop_nums[0][slicetype];
3411 if (frame_size_alpha > 30) frame_size_alpha = 30;
3412 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3413 (double)(frame_size_alpha + 1.);
3415 /* frame_size_next: avoiding negative number and too small value */
3416 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3417 frame_size_next = (int)((double)target_frame_size * 0.25);
3419 qpf = (double)qp * target_frame_size / frame_size_next;
3420 qpn = (int)(qpf + 0.5);
3423 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3424 mfc_context->brc.qpf_rounding_accumulator[0] += qpf - qpn;
3425 if (mfc_context->brc.qpf_rounding_accumulator[0] > 1.0) {
3427 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3428 } else if (mfc_context->brc.qpf_rounding_accumulator[0] < -1.0) {
3430 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3434 /* making sure that QP is not changing too fast */
3435 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3436 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3437 /* making sure that with QP predictions we did do not leave QPs range */
3438 BRC_CLIP(qpn, min_qindex, max_qindex);
3440 /* checking wthether HRD compliance is still met */
3441 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3443 /* calculating QP delta as some function*/
3444 x = mfc_context->hrd.target_buffer_fullness[0] - mfc_context->hrd.current_buffer_fullness[0];
3446 x /= mfc_context->hrd.target_buffer_fullness[0];
3447 y = mfc_context->hrd.current_buffer_fullness[0];
3450 x /= (mfc_context->hrd.buffer_size[0] - mfc_context->hrd.target_buffer_fullness[0]);
3451 y = mfc_context->hrd.buffer_size[0] - mfc_context->hrd.current_buffer_fullness[0];
3453 if (y < 0.01) y = 0.01;
3455 else if (x < -1) x = -1;
3457 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
3458 qpn = (int)(qpn + delta_qp + 0.5);
3460 /* making sure that with QP predictions we did do not leave QPs range */
3461 BRC_CLIP(qpn, min_qindex, max_qindex);
3463 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3464 /* correcting QPs of slices of other types */
3465 if (!is_key_frame) {
3466 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3467 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3469 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3470 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3472 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3473 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3474 } else if (sts == BRC_UNDERFLOW) { // underflow
3475 if (qpn <= qp) qpn = qp + 2;
3476 if (qpn > max_qindex) {
3478 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3480 } else if (sts == BRC_OVERFLOW) {
3481 if (qpn >= qp) qpn = qp - 2;
3482 if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3484 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3488 mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3493 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3494 struct intel_encoder_context *encoder_context)
3496 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3497 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3498 int target_bit_rate = encoder_context->brc.bits_per_second[0];
3500 // current we only support CBR mode.
3501 if (rate_control_mode == VA_RC_CBR) {
3502 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3503 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3504 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3505 mfc_context->vui_hrd.i_frame_number = 0;
3507 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3508 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3509 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3514 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3515 struct gen6_mfc_context *mfc_context)
3517 mfc_context->vui_hrd.i_frame_number++;
3520 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3521 struct intel_encoder_context *encoder_context)
3523 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3525 if (rate_control_mode == VA_RC_CBR) {
3527 assert(encoder_context->codec != CODEC_MPEG2);
3529 brc_updated = encoder_context->brc.need_reset;
3531 /*Programing bit rate control */
3533 gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3536 /*Programing HRD control */
3538 gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3542 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3543 VAEncPictureParameterBufferVP8 *pic_param,
3544 VAQMatrixBufferVP8 *q_matrix)
3547 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3548 unsigned char *coeff_probs_stream_in_buffer;
3550 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3551 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3552 mfc_context->vp8_state.frame_header_token_update_pos = 0;
3553 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3555 mfc_context->vp8_state.prob_skip_false = 255;
3556 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3557 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3560 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3561 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3563 mfc_context->vp8_state.prob_intra = 255;
3564 mfc_context->vp8_state.prob_last = 128;
3565 mfc_context->vp8_state.prob_gf = 128;
3567 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3568 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3570 mfc_context->vp8_state.prob_intra = 63;
3571 mfc_context->vp8_state.prob_last = 128;
3572 mfc_context->vp8_state.prob_gf = 128;
3575 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3577 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3578 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3579 assert(coeff_probs_stream_in_buffer);
3580 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3581 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3584 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3585 VAQMatrixBufferVP8 *q_matrix)
3588 /*some other probabilities need to be updated*/
3591 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3592 VAEncPictureParameterBufferVP8 *pic_param,
3593 VAQMatrixBufferVP8 *q_matrix,
3594 struct gen6_mfc_context *mfc_context,
3595 struct intel_encoder_context *encoder_context);
3597 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3598 struct intel_encoder_context *encoder_context,
3599 struct gen6_mfc_context *mfc_context)
3601 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3602 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3603 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3604 unsigned char *frame_header_buffer;
3606 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3608 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3609 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3610 assert(frame_header_buffer);
3611 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3612 free(mfc_context->vp8_state.vp8_frame_header);
3613 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3616 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3617 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3619 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3620 struct encode_state *encode_state,
3621 struct intel_encoder_context *encoder_context)
3623 struct i965_driver_data *i965 = i965_driver_data(ctx);
3624 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3627 int width_in_mbs = 0;
3628 int height_in_mbs = 0;
3629 int slice_batchbuffer_size;
3630 int is_key_frame, slice_type, rate_control_mode;
3632 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3633 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3634 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3636 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3637 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3639 is_key_frame = !pic_param->pic_flags.bits.frame_type;
3640 slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3641 rate_control_mode = encoder_context->rate_control_mode;
3643 if (rate_control_mode == VA_RC_CBR) {
3644 q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3645 for (i = 1; i < 4; i++)
3646 q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3647 for (i = 0; i < 5; i++)
3648 q_matrix->quantization_index_delta[i] = 0;
3651 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3652 (SLICE_HEADER + SLICE_TAIL);
3654 /*Encode common setup for MFC*/
3655 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3656 mfc_context->post_deblocking_output.bo = NULL;
3658 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3659 mfc_context->pre_deblocking_output.bo = NULL;
3661 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3662 mfc_context->uncompressed_picture_source.bo = NULL;
3664 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3665 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3667 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3668 if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3669 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3670 mfc_context->direct_mv_buffers[i].bo = NULL;
3673 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3674 if (mfc_context->reference_surfaces[i].bo != NULL)
3675 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3676 mfc_context->reference_surfaces[i].bo = NULL;
3679 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3680 bo = dri_bo_alloc(i965->intel.bufmgr,
3682 width_in_mbs * 64 * 16,
3685 mfc_context->intra_row_store_scratch_buffer.bo = bo;
3687 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3688 bo = dri_bo_alloc(i965->intel.bufmgr,
3690 width_in_mbs * height_in_mbs * 16,
3693 mfc_context->macroblock_status_buffer.bo = bo;
3695 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3696 bo = dri_bo_alloc(i965->intel.bufmgr,
3698 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3701 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3703 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3704 bo = dri_bo_alloc(i965->intel.bufmgr,
3706 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3709 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3711 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3712 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3714 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3715 mfc_context->aux_batchbuffer_surface.bo = NULL;
3717 if (mfc_context->aux_batchbuffer) {
3718 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3719 mfc_context->aux_batchbuffer = NULL;
3722 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3723 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3724 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3725 mfc_context->aux_batchbuffer_surface.pitch = 16;
3726 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3727 mfc_context->aux_batchbuffer_surface.size_block = 16;
3729 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3731 /* alloc vp8 encoding buffers*/
3732 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3733 bo = dri_bo_alloc(i965->intel.bufmgr,
3735 MAX_VP8_FRAME_HEADER_SIZE,
3738 mfc_context->vp8_state.frame_header_bo = bo;
3740 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3741 for(i = 0; i < 8; i++) {
3742 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3744 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3745 bo = dri_bo_alloc(i965->intel.bufmgr,
3747 mfc_context->vp8_state.intermediate_buffer_max_size,
3750 mfc_context->vp8_state.intermediate_bo = bo;
3752 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3753 bo = dri_bo_alloc(i965->intel.bufmgr,
3755 width_in_mbs * height_in_mbs * 16,
3758 mfc_context->vp8_state.stream_out_bo = bo;
3760 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3761 bo = dri_bo_alloc(i965->intel.bufmgr,
3763 sizeof(vp8_default_coef_probs),
3766 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3768 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3769 bo = dri_bo_alloc(i965->intel.bufmgr,
3771 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3774 mfc_context->vp8_state.token_statistics_bo = bo;
3776 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3777 bo = dri_bo_alloc(i965->intel.bufmgr,
3779 width_in_mbs * 16 * 64,
3782 mfc_context->vp8_state.mpc_row_store_bo = bo;
3784 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3785 vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3789 intel_mfc_vp8_prepare(VADriverContextP ctx,
3790 struct encode_state *encode_state,
3791 struct intel_encoder_context *encoder_context)
3793 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3794 struct object_surface *obj_surface;
3795 struct object_buffer *obj_buffer;
3796 struct i965_coded_buffer_segment *coded_buffer_segment;
3797 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3798 VAStatus vaStatus = VA_STATUS_SUCCESS;
3802 /* reconstructed surface */
3803 obj_surface = encode_state->reconstructed_object;
3804 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3805 if (pic_param->loop_filter_level[0] == 0) {
3806 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3807 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3809 mfc_context->post_deblocking_output.bo = obj_surface->bo;
3810 dri_bo_reference(mfc_context->post_deblocking_output.bo);
3813 mfc_context->surface_state.width = obj_surface->orig_width;
3814 mfc_context->surface_state.height = obj_surface->orig_height;
3815 mfc_context->surface_state.w_pitch = obj_surface->width;
3816 mfc_context->surface_state.h_pitch = obj_surface->height;
3818 /* set vp8 reference frames */
3819 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3820 obj_surface = encode_state->reference_objects[i];
3822 if (obj_surface && obj_surface->bo) {
3823 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3824 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3826 mfc_context->reference_surfaces[i].bo = NULL;
3830 /* input YUV surface */
3831 obj_surface = encode_state->input_yuv_object;
3832 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3833 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3836 obj_buffer = encode_state->coded_buf_object;
3837 bo = obj_buffer->buffer_store->bo;
3838 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3839 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3840 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3841 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3843 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3844 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3845 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3846 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3848 /* set the internal flag to 0 to indicate the coded size is unknown */
3850 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3851 coded_buffer_segment->mapped = 0;
3852 coded_buffer_segment->codec = encoder_context->codec;
3859 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3860 struct encode_state *encode_state,
3861 struct intel_encoder_context *encoder_context)
3863 struct intel_batchbuffer *batch = encoder_context->base.batch;
3864 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3865 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3866 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3868 BEGIN_BCS_BATCH(batch, 30);
3869 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3871 OUT_BCS_BATCH(batch,
3872 0 << 9 | /* compressed bitstream output disable */
3873 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3874 1 << 6 | /* RC initial pass */
3875 0 << 4 | /* upate segment feature date flag */
3876 1 << 3 | /* bitstream statistics output enable */
3877 1 << 2 | /* token statistics output enable */
3878 0 << 1 | /* final bitstream output disable */
3881 OUT_BCS_BATCH(batch, 0); /*DW2*/
3883 OUT_BCS_BATCH(batch,
3884 0xfff << 16 | /* max intra mb bit count limit */
3885 0xfff << 0 /* max inter mb bit count limit */
3888 OUT_BCS_BATCH(batch, 0); /*DW4*/
3889 OUT_BCS_BATCH(batch, 0); /*DW5*/
3890 OUT_BCS_BATCH(batch, 0); /*DW6*/
3891 OUT_BCS_BATCH(batch, 0); /*DW7*/
3892 OUT_BCS_BATCH(batch, 0); /*DW8*/
3893 OUT_BCS_BATCH(batch, 0); /*DW9*/
3894 OUT_BCS_BATCH(batch, 0); /*DW10*/
3895 OUT_BCS_BATCH(batch, 0); /*DW11*/
3896 OUT_BCS_BATCH(batch, 0); /*DW12*/
3897 OUT_BCS_BATCH(batch, 0); /*DW13*/
3898 OUT_BCS_BATCH(batch, 0); /*DW14*/
3899 OUT_BCS_BATCH(batch, 0); /*DW15*/
3900 OUT_BCS_BATCH(batch, 0); /*DW16*/
3901 OUT_BCS_BATCH(batch, 0); /*DW17*/
3902 OUT_BCS_BATCH(batch, 0); /*DW18*/
3903 OUT_BCS_BATCH(batch, 0); /*DW19*/
3904 OUT_BCS_BATCH(batch, 0); /*DW20*/
3905 OUT_BCS_BATCH(batch, 0); /*DW21*/
3907 OUT_BCS_BATCH(batch,
3908 pic_param->pic_flags.bits.show_frame << 23 |
3909 pic_param->pic_flags.bits.version << 20
3912 OUT_BCS_BATCH(batch,
3913 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3914 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3918 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3921 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3924 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3927 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3930 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3933 OUT_BCS_BATCH(batch, 0);
3935 ADVANCE_BCS_BATCH(batch);
3939 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3940 struct encode_state *encode_state,
3941 struct intel_encoder_context *encoder_context)
3943 struct intel_batchbuffer *batch = encoder_context->base.batch;
3944 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3945 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3946 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3947 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3950 log2num = pic_param->pic_flags.bits.num_token_partitions;
3952 /*update mode and token probs*/
3953 vp8_enc_state_update(mfc_context, q_matrix);
3955 BEGIN_BCS_BATCH(batch, 38);
3956 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3957 OUT_BCS_BATCH(batch,
3958 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3959 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3961 OUT_BCS_BATCH(batch,
3963 pic_param->sharpness_level << 16 |
3964 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3965 pic_param->pic_flags.bits.sign_bias_golden << 12 |
3966 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3967 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3968 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3969 pic_param->pic_flags.bits.segmentation_enabled << 8 |
3970 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3971 (pic_param->pic_flags.bits.version / 2) << 4 |
3972 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3973 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3975 OUT_BCS_BATCH(batch,
3976 pic_param->loop_filter_level[3] << 24 |
3977 pic_param->loop_filter_level[2] << 16 |
3978 pic_param->loop_filter_level[1] << 8 |
3979 pic_param->loop_filter_level[0] << 0);
3981 OUT_BCS_BATCH(batch,
3982 q_matrix->quantization_index[3] << 24 |
3983 q_matrix->quantization_index[2] << 16 |
3984 q_matrix->quantization_index[1] << 8 |
3985 q_matrix->quantization_index[0] << 0);
3987 OUT_BCS_BATCH(batch,
3988 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3989 abs(q_matrix->quantization_index_delta[4]) << 24 |
3990 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3991 abs(q_matrix->quantization_index_delta[3]) << 16 |
3992 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3993 abs(q_matrix->quantization_index_delta[2]) << 8 |
3994 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3995 abs(q_matrix->quantization_index_delta[1]) << 0);
3997 OUT_BCS_BATCH(batch,
3998 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3999 abs(q_matrix->quantization_index_delta[0]) << 0);
4001 OUT_BCS_BATCH(batch,
4002 pic_param->clamp_qindex_high << 8 |
4003 pic_param->clamp_qindex_low << 0);
4005 for (i = 8; i < 19; i++) {
4006 OUT_BCS_BATCH(batch, 0xffffffff);
4009 OUT_BCS_BATCH(batch,
4010 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
4011 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
4012 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
4014 OUT_BCS_BATCH(batch,
4015 mfc_context->vp8_state.prob_skip_false << 24 |
4016 mfc_context->vp8_state.prob_intra << 16 |
4017 mfc_context->vp8_state.prob_last << 8 |
4018 mfc_context->vp8_state.prob_gf << 0);
4020 OUT_BCS_BATCH(batch,
4021 mfc_context->vp8_state.y_mode_probs[3] << 24 |
4022 mfc_context->vp8_state.y_mode_probs[2] << 16 |
4023 mfc_context->vp8_state.y_mode_probs[1] << 8 |
4024 mfc_context->vp8_state.y_mode_probs[0] << 0);
4026 OUT_BCS_BATCH(batch,
4027 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4028 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
4029 mfc_context->vp8_state.uv_mode_probs[0] << 0);
4031 /* MV update value, DW23-DW32 */
4032 for (i = 0; i < 2; i++) {
4033 for (j = 0; j < 20; j += 4) {
4034 OUT_BCS_BATCH(batch,
4035 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4036 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4037 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
4038 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
4042 OUT_BCS_BATCH(batch,
4043 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4044 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4045 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
4046 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
4048 OUT_BCS_BATCH(batch,
4049 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4050 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4051 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
4052 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
4054 OUT_BCS_BATCH(batch, 0);
4055 OUT_BCS_BATCH(batch, 0);
4056 OUT_BCS_BATCH(batch, 0);
4058 ADVANCE_BCS_BATCH(batch);
4061 #define OUT_VP8_BUFFER(bo, offset) \
4063 OUT_BCS_RELOC(batch, \
4065 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4068 OUT_BCS_BATCH(batch, 0); \
4069 OUT_BCS_BATCH(batch, 0); \
4070 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4073 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4074 struct encode_state *encode_state,
4075 struct intel_encoder_context *encoder_context)
4077 struct i965_driver_data *i965 = i965_driver_data(ctx);
4078 struct intel_batchbuffer *batch = encoder_context->base.batch;
4079 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4081 BEGIN_BCS_BATCH(batch, 32);
4082 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4084 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4086 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4087 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4088 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4089 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4090 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4091 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4092 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4093 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4094 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4095 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4097 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4098 OUT_BCS_BATCH(batch, 0);
4100 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4101 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4102 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4103 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4105 ADVANCE_BCS_BATCH(batch);
4109 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4110 struct encode_state *encode_state,
4111 struct intel_encoder_context *encoder_context)
4113 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4115 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4116 mfc_context->set_surface_state(ctx, encoder_context);
4117 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4118 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4119 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4120 gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4121 gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
4122 gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4125 static const unsigned char
4126 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4133 static const unsigned char
4134 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4146 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4148 unsigned int i, pak_pred_mode = 0;
4149 unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4152 pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4154 for (i = 0; i < 8; i++) {
4155 vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4156 assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4157 pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4158 pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4162 return pak_pred_mode;
4165 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4166 struct intel_encoder_context *encoder_context,
4169 struct intel_batchbuffer *batch)
4171 unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4172 unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4173 unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4176 batch = encoder_context->base.batch;
4178 vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4179 assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4180 pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4182 vme_luma_pred_mode[0] = msg[1];
4183 vme_luma_pred_mode[1] = msg[2];
4184 vme_chroma_pred_mode = msg[3] & 0x3;
4186 pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4187 pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4188 pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4190 BEGIN_BCS_BATCH(batch, 7);
4192 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4193 OUT_BCS_BATCH(batch, 0);
4194 OUT_BCS_BATCH(batch, 0);
4195 OUT_BCS_BATCH(batch,
4196 (0 << 20) | /* mv format: intra mb */
4197 (0 << 18) | /* Segment ID */
4198 (0 << 17) | /* disable coeff clamp */
4199 (1 << 13) | /* intra mb flag */
4200 (0 << 11) | /* refer picture select: last frame */
4201 (pak_intra_mb_mode << 8) | /* mb type */
4202 (pak_chroma_pred_mode << 4) | /* mb uv mode */
4203 (0 << 2) | /* skip mb flag: disable */
4206 OUT_BCS_BATCH(batch, (y << 16) | x);
4207 OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4208 OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4210 ADVANCE_BCS_BATCH(batch);
4214 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4215 struct intel_encoder_context *encoder_context,
4219 struct intel_batchbuffer *batch)
4224 batch = encoder_context->base.batch;
4226 /* only support inter_16x16 now */
4227 assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4228 /* for inter_16x16, all 16 MVs should be same,
4229 * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4230 * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4232 msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
4234 for (i = 1; i < 16; i++) {
4238 BEGIN_BCS_BATCH(batch, 7);
4240 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4241 OUT_BCS_BATCH(batch,
4242 (0 << 29) | /* enable inline mv data: disable */
4244 OUT_BCS_BATCH(batch,
4246 OUT_BCS_BATCH(batch,
4247 (4 << 20) | /* mv format: inter */
4248 (0 << 18) | /* Segment ID */
4249 (0 << 17) | /* coeff clamp: disable */
4250 (0 << 13) | /* intra mb flag: inter mb */
4251 (0 << 11) | /* refer picture select: last frame */
4252 (0 << 8) | /* mb type: 16x16 */
4253 (0 << 4) | /* mb uv mode: dc_pred */
4254 (0 << 2) | /* skip mb flag: disable */
4257 OUT_BCS_BATCH(batch, (y << 16) | x);
4260 OUT_BCS_BATCH(batch, 0x8);
4261 OUT_BCS_BATCH(batch, 0x8);
4263 ADVANCE_BCS_BATCH(batch);
4267 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4268 struct encode_state *encode_state,
4269 struct intel_encoder_context *encoder_context,
4270 struct intel_batchbuffer *slice_batch)
4272 struct gen6_vme_context *vme_context = encoder_context->vme_context;
4273 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4274 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4275 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4276 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4277 unsigned int *msg = NULL;
4278 unsigned char *msg_ptr = NULL;
4279 unsigned int i, offset, is_intra_frame;
4281 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4283 dri_bo_map(vme_context->vme_output.bo , 1);
4284 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4286 for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
4287 int h_pos = i % width_in_mbs;
4288 int v_pos = i / width_in_mbs;
4289 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
4291 if (is_intra_frame) {
4292 gen8_mfc_vp8_pak_object_intra(ctx,
4298 int inter_rdo, intra_rdo;
4299 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4300 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4302 if (intra_rdo < inter_rdo) {
4303 gen8_mfc_vp8_pak_object_intra(ctx,
4309 offset = i * vme_context->vme_output.size_block;
4310 gen8_mfc_vp8_pak_object_inter(ctx,
4320 dri_bo_unmap(vme_context->vme_output.bo);
4324 * A batch buffer for vp8 pak object commands
4327 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4328 struct encode_state *encode_state,
4329 struct intel_encoder_context *encoder_context)
4331 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4332 struct intel_batchbuffer *batch;
4335 batch = mfc_context->aux_batchbuffer;
4336 batch_bo = batch->buffer;
4338 gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4340 intel_batchbuffer_align(batch, 8);
4342 BEGIN_BCS_BATCH(batch, 2);
4343 OUT_BCS_BATCH(batch, 0);
4344 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4345 ADVANCE_BCS_BATCH(batch);
4347 dri_bo_reference(batch_bo);
4348 intel_batchbuffer_free(batch);
4349 mfc_context->aux_batchbuffer = NULL;
4355 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4356 struct encode_state *encode_state,
4357 struct intel_encoder_context *encoder_context)
4359 struct intel_batchbuffer *batch = encoder_context->base.batch;
4360 dri_bo *slice_batch_bo;
4362 slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4365 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4366 intel_batchbuffer_emit_mi_flush(batch);
4368 // picture level programing
4369 gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4371 BEGIN_BCS_BATCH(batch, 4);
4372 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4373 OUT_BCS_RELOC(batch,
4375 I915_GEM_DOMAIN_COMMAND, 0,
4377 OUT_BCS_BATCH(batch, 0);
4378 OUT_BCS_BATCH(batch, 0);
4379 ADVANCE_BCS_BATCH(batch);
4382 intel_batchbuffer_end_atomic(batch);
4384 dri_bo_unreference(slice_batch_bo);
4387 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4388 struct encode_state *encode_state,
4389 struct intel_encoder_context *encoder_context)
4391 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4392 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4393 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4394 unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4396 int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4398 first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4400 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4402 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4403 first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4405 for (i = 1; i <= partition_num; i++)
4406 token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4408 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4409 /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4410 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4412 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4414 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4415 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4416 coded_buffer_segment->base.size = vp8_coded_bytes;
4417 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4419 return vp8_coded_bytes;
4423 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4424 struct encode_state *encode_state,
4425 struct intel_encoder_context *encoder_context)
4427 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4428 unsigned int rate_control_mode = encoder_context->rate_control_mode;
4429 int current_frame_bits_size;
4432 gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4433 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4434 /*Programing bcs pipeline*/
4435 gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4436 gen8_mfc_run(ctx, encode_state, encoder_context);
4437 current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4439 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4440 sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4441 if (sts == BRC_NO_HRD_VIOLATION) {
4442 gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4444 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4445 if (!mfc_context->hrd.violation_noted) {
4446 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
4447 mfc_context->hrd.violation_noted = 1;
4449 return VA_STATUS_SUCCESS;
4453 return VA_STATUS_SUCCESS;
4457 gen8_mfc_context_destroy(void *context)
4459 struct gen6_mfc_context *mfc_context = context;
4462 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4463 mfc_context->post_deblocking_output.bo = NULL;
4465 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4466 mfc_context->pre_deblocking_output.bo = NULL;
4468 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4469 mfc_context->uncompressed_picture_source.bo = NULL;
4471 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4472 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4474 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
4475 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4476 mfc_context->direct_mv_buffers[i].bo = NULL;
4479 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4480 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4482 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4483 mfc_context->macroblock_status_buffer.bo = NULL;
4485 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4486 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4488 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4489 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4492 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
4493 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4494 mfc_context->reference_surfaces[i].bo = NULL;
4497 gen8_gpe_context_destroy(&mfc_context->gpe_context);
4499 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4500 mfc_context->mfc_batchbuffer_surface.bo = NULL;
4502 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4503 mfc_context->aux_batchbuffer_surface.bo = NULL;
4505 if (mfc_context->aux_batchbuffer)
4506 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4508 mfc_context->aux_batchbuffer = NULL;
4510 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4511 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4513 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4514 mfc_context->vp8_state.final_frame_bo = NULL;
4516 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4517 mfc_context->vp8_state.frame_header_bo = NULL;
4519 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4520 mfc_context->vp8_state.intermediate_bo = NULL;
4522 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4523 mfc_context->vp8_state.mpc_row_store_bo = NULL;
4525 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4526 mfc_context->vp8_state.stream_out_bo = NULL;
4528 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4529 mfc_context->vp8_state.token_statistics_bo = NULL;
4534 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4536 struct encode_state *encode_state,
4537 struct intel_encoder_context *encoder_context)
4542 case VAProfileH264ConstrainedBaseline:
4543 case VAProfileH264Main:
4544 case VAProfileH264High:
4545 case VAProfileH264MultiviewHigh:
4546 case VAProfileH264StereoHigh:
4547 vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4550 /* FIXME: add for other profile */
4551 case VAProfileMPEG2Simple:
4552 case VAProfileMPEG2Main:
4553 vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4556 case VAProfileJPEGBaseline:
4557 jpeg_init_default_qmatrix(ctx, encoder_context);
4558 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4561 case VAProfileVP8Version0_3:
4562 vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4566 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4573 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4575 struct i965_driver_data *i965 = i965_driver_data(ctx);
4576 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4578 assert(mfc_context);
4579 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4581 mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
4582 mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
4583 mfc_context->gpe_context.curbe.length = 32 * 4;
4584 mfc_context->gpe_context.sampler.entry_size = 0;
4585 mfc_context->gpe_context.sampler.max_entries = 0;
4587 if (i965->intel.eu_total > 0)
4588 mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
4590 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4592 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4593 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4594 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4595 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4597 if (IS_GEN9(i965->intel.device_info)) {
4598 gen8_gpe_load_kernels(ctx,
4599 &mfc_context->gpe_context,
4603 gen8_gpe_load_kernels(ctx,
4604 &mfc_context->gpe_context,
4609 mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4610 mfc_context->set_surface_state = gen8_mfc_surface_state;
4611 mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4612 mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4613 mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4614 mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4615 mfc_context->insert_object = gen8_mfc_avc_insert_object;
4616 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4618 encoder_context->mfc_context = mfc_context;
4619 encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4620 encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4622 if (encoder_context->codec == CODEC_VP8)
4623 encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4625 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;