2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define MFC_SOFTWARE_BATCH 0
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75 16, 11, 10, 16, 24, 40, 51, 61,
76 12, 12, 14, 19, 26, 58, 60, 55,
77 14, 13, 16, 24, 40, 57, 69, 56,
78 14, 17, 22, 29, 51, 87, 80, 62,
79 18, 22, 37, 56, 68, 109, 103, 77,
80 24, 35, 55, 64, 81, 104, 113, 92,
81 49, 64, 78, 87, 103, 121, 120, 101,
82 72, 92, 95, 98, 112, 100, 103, 99
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88 17, 18, 24, 47, 99, 99, 99, 99,
89 18, 21, 26, 66, 99, 99, 99, 99,
90 24, 26, 56, 99, 99, 99, 99, 99,
91 47, 66, 99, 99, 99, 99, 99, 99,
92 99, 99, 99, 99, 99, 99, 99, 99,
93 99, 99, 99, 99, 99, 99, 99, 99,
94 99, 99, 99, 99, 99, 99, 99, 99,
95 99, 99, 99, 99, 99, 99, 99, 99
99 static const int va_to_gen7_jpeg_hufftable[2] = {
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
112 static struct i965_kernel gen8_mfc_kernels[] = {
114 "MFC AVC INTRA BATCHBUFFER ",
115 MFC_BATCHBUFFER_AVC_INTRA,
116 gen8_mfc_batchbuffer_avc,
117 sizeof(gen8_mfc_batchbuffer_avc),
122 static struct i965_kernel gen9_mfc_kernels[] = {
124 "MFC AVC INTRA BATCHBUFFER ",
125 MFC_BATCHBUFFER_AVC_INTRA,
126 gen9_mfc_batchbuffer_avc,
127 sizeof(gen9_mfc_batchbuffer_avc),
132 static const uint32_t qm_flat[16] = {
133 0x10101010, 0x10101010, 0x10101010, 0x10101010,
134 0x10101010, 0x10101010, 0x10101010, 0x10101010,
135 0x10101010, 0x10101010, 0x10101010, 0x10101010,
136 0x10101010, 0x10101010, 0x10101010, 0x10101010
139 static const uint32_t fqm_flat[32] = {
140 0x10001000, 0x10001000, 0x10001000, 0x10001000,
141 0x10001000, 0x10001000, 0x10001000, 0x10001000,
142 0x10001000, 0x10001000, 0x10001000, 0x10001000,
143 0x10001000, 0x10001000, 0x10001000, 0x10001000,
144 0x10001000, 0x10001000, 0x10001000, 0x10001000,
145 0x10001000, 0x10001000, 0x10001000, 0x10001000,
146 0x10001000, 0x10001000, 0x10001000, 0x10001000,
147 0x10001000, 0x10001000, 0x10001000, 0x10001000
150 #define INTER_MODE_MASK 0x03
151 #define INTER_8X8 0x03
152 #define INTER_16X8 0x01
153 #define INTER_8X16 0x02
154 #define SUBMB_SHAPE_MASK 0x00FF00
155 #define INTER_16X16 0x00
157 #define INTER_MV8 (4 << 20)
158 #define INTER_MV32 (6 << 20)
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
164 struct intel_encoder_context *encoder_context)
166 struct intel_batchbuffer *batch = encoder_context->base.batch;
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
169 assert(standard_select == MFX_FORMAT_MPEG2 ||
170 standard_select == MFX_FORMAT_AVC ||
171 standard_select == MFX_FORMAT_JPEG ||
172 standard_select == MFX_FORMAT_VP8);
174 BEGIN_BCS_BATCH(batch, 5);
176 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
178 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179 (MFD_MODE_VLD << 15) | /* VLD mode */
180 (0 << 10) | /* Stream-Out Enable */
181 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
182 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
183 (0 << 6) | /* frame statistics stream-out enable*/
184 (0 << 5) | /* not in stitch mode */
185 (1 << 4) | /* encoding mode */
186 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
188 (0 << 7) | /* expand NOA bus flag */
189 (0 << 6) | /* disable slice-level clock gating */
190 (0 << 5) | /* disable clock gating for NOA */
191 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
192 (0 << 3) | /* terminate if AVC mbdata error occurs */
193 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
199 ADVANCE_BCS_BATCH(batch);
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
205 struct intel_batchbuffer *batch = encoder_context->base.batch;
206 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
208 BEGIN_BCS_BATCH(batch, 6);
210 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211 OUT_BCS_BATCH(batch, 0);
213 ((mfc_context->surface_state.height - 1) << 18) |
214 ((mfc_context->surface_state.width - 1) << 4));
216 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218 (0 << 22) | /* surface object control state, FIXME??? */
219 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220 (0 << 2) | /* must be 0 for interleave U/V */
221 (1 << 1) | /* must be tiled */
222 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
224 (0 << 16) | /* must be 0 for interleave U/V */
225 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
226 OUT_BCS_BATCH(batch, 0);
228 ADVANCE_BCS_BATCH(batch);
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233 struct intel_encoder_context *encoder_context)
235 struct intel_batchbuffer *batch = encoder_context->base.batch;
236 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
237 struct gen6_vme_context *vme_context = encoder_context->vme_context;
239 unsigned int bse_offset;
241 BEGIN_BCS_BATCH(batch, 26);
243 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
244 /* the DW1-3 is for the MFX indirect bistream offset */
245 OUT_BCS_BATCH(batch, 0);
246 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
249 /* the DW4-5 is the MFX upper bound */
250 if (encoder_context->codec == CODEC_VP8) {
252 mfc_context->mfc_indirect_pak_bse_object.bo,
253 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
254 mfc_context->mfc_indirect_pak_bse_object.end_offset);
255 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
261 if(encoder_context->codec != CODEC_JPEG) {
262 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
263 /* the DW6-10 is for MFX Indirect MV Object Base Address */
264 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
265 OUT_BCS_BATCH(batch, 0);
266 OUT_BCS_BATCH(batch, 0);
267 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
268 OUT_BCS_BATCH(batch, 0);
270 /* No VME for JPEG */
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
274 OUT_BCS_BATCH(batch, 0);
275 OUT_BCS_BATCH(batch, 0);
278 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
279 OUT_BCS_BATCH(batch, 0);
280 OUT_BCS_BATCH(batch, 0);
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
285 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
286 OUT_BCS_BATCH(batch, 0);
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
292 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
293 bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
295 mfc_context->mfc_indirect_pak_bse_object.bo,
296 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
302 mfc_context->mfc_indirect_pak_bse_object.bo,
303 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
304 mfc_context->mfc_indirect_pak_bse_object.end_offset);
305 OUT_BCS_BATCH(batch, 0);
307 ADVANCE_BCS_BATCH(batch);
311 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
312 struct intel_encoder_context *encoder_context)
314 struct intel_batchbuffer *batch = encoder_context->base.batch;
315 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
316 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
318 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
319 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
321 BEGIN_BCS_BATCH(batch, 16);
323 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
324 /*DW1. MB setting of frame */
326 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
328 ((height_in_mbs - 1) << 16) |
329 ((width_in_mbs - 1) << 0));
332 (0 << 24) | /* Second Chroma QP Offset */
333 (0 << 16) | /* Chroma QP Offset */
334 (0 << 14) | /* Max-bit conformance Intra flag */
335 (0 << 13) | /* Max Macroblock size conformance Inter flag */
336 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
337 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
338 (0 << 8) | /* FIXME: Image Structure */
339 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
341 (0 << 16) | /* Mininum Frame size */
342 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
343 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
344 (0 << 13) | /* CABAC 0 word insertion test enable */
345 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
346 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
347 (0 << 8) | /* FIXME: MbMvFormatFlag */
348 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
349 (0 << 6) | /* Only valid for VLD decoding mode */
350 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
351 (0 << 4) | /* Direct 8x8 inference flag */
352 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
353 (1 << 2) | /* Frame MB only flag */
354 (0 << 1) | /* MBAFF mode is in active */
355 (0 << 0)); /* Field picture flag */
356 /* DW5 Trellis quantization */
357 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
358 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
359 (0xBB8 << 16) | /* InterMbMaxSz */
360 (0xEE8) ); /* IntraMbMaxSz */
361 OUT_BCS_BATCH(batch, 0); /* Reserved */
363 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
364 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
365 /* DW10. Bit setting for MB */
366 OUT_BCS_BATCH(batch, 0x8C000000);
367 OUT_BCS_BATCH(batch, 0x00010000);
369 OUT_BCS_BATCH(batch, 0);
370 OUT_BCS_BATCH(batch, 0x02010100);
371 /* DW14. For short format */
372 OUT_BCS_BATCH(batch, 0);
373 OUT_BCS_BATCH(batch, 0);
375 ADVANCE_BCS_BATCH(batch);
379 gen8_mfc_qm_state(VADriverContextP ctx,
383 struct intel_encoder_context *encoder_context)
385 struct intel_batchbuffer *batch = encoder_context->base.batch;
386 unsigned int qm_buffer[16];
388 assert(qm_length <= 16);
389 assert(sizeof(*qm) == 4);
390 memcpy(qm_buffer, qm, qm_length * 4);
392 BEGIN_BCS_BATCH(batch, 18);
393 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
394 OUT_BCS_BATCH(batch, qm_type << 0);
395 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
396 ADVANCE_BCS_BATCH(batch);
400 gen8_mfc_avc_qm_state(VADriverContextP ctx,
401 struct encode_state *encode_state,
402 struct intel_encoder_context *encoder_context)
404 const unsigned int *qm_4x4_intra;
405 const unsigned int *qm_4x4_inter;
406 const unsigned int *qm_8x8_intra;
407 const unsigned int *qm_8x8_inter;
408 VAEncSequenceParameterBufferH264 *pSeqParameter =
409 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
410 VAEncPictureParameterBufferH264 *pPicParameter =
411 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
413 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
414 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
415 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
417 VAIQMatrixBufferH264 *qm;
418 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
419 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
420 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
421 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
422 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
423 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
426 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
427 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
428 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
429 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
433 gen8_mfc_fqm_state(VADriverContextP ctx,
437 struct intel_encoder_context *encoder_context)
439 struct intel_batchbuffer *batch = encoder_context->base.batch;
440 unsigned int fqm_buffer[32];
442 assert(fqm_length <= 32);
443 assert(sizeof(*fqm) == 4);
444 memcpy(fqm_buffer, fqm, fqm_length * 4);
446 BEGIN_BCS_BATCH(batch, 34);
447 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
448 OUT_BCS_BATCH(batch, fqm_type << 0);
449 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
450 ADVANCE_BCS_BATCH(batch);
454 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
457 for (i = 0; i < len; i++)
458 for (j = 0; j < len; j++)
459 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
463 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
464 struct encode_state *encode_state,
465 struct intel_encoder_context *encoder_context)
467 VAEncSequenceParameterBufferH264 *pSeqParameter =
468 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
469 VAEncPictureParameterBufferH264 *pPicParameter =
470 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
472 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
473 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
474 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
475 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
476 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
477 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
481 VAIQMatrixBufferH264 *qm;
482 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
483 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
485 for (i = 0; i < 3; i++)
486 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
487 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
489 for (i = 3; i < 6; i++)
490 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
491 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
493 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
494 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
496 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
497 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
502 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
503 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
504 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
505 struct intel_batchbuffer *batch)
508 batch = encoder_context->base.batch;
510 if (data_bits_in_last_dw == 0)
511 data_bits_in_last_dw = 32;
513 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
515 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
517 (0 << 16) | /* always start at offset 0 */
518 (data_bits_in_last_dw << 8) |
519 (skip_emul_byte_count << 4) |
520 (!!emulation_flag << 3) |
521 ((!!is_last_header) << 2) |
522 ((!!is_end_of_slice) << 1) |
523 (0 << 0)); /* FIXME: ??? */
524 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
526 ADVANCE_BCS_BATCH(batch);
530 static void gen8_mfc_init(VADriverContextP ctx,
531 struct encode_state *encode_state,
532 struct intel_encoder_context *encoder_context)
534 struct i965_driver_data *i965 = i965_driver_data(ctx);
535 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
538 int width_in_mbs = 0;
539 int height_in_mbs = 0;
540 int slice_batchbuffer_size;
542 if (encoder_context->codec == CODEC_H264 ||
543 encoder_context->codec == CODEC_H264_MVC) {
544 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
545 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
546 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
547 } else if (encoder_context->codec == CODEC_MPEG2) {
548 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
550 assert(encoder_context->codec == CODEC_MPEG2);
552 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
553 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
555 assert(encoder_context->codec == CODEC_JPEG);
556 VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
558 width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
559 height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
562 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
563 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
565 /*Encode common setup for MFC*/
566 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
567 mfc_context->post_deblocking_output.bo = NULL;
569 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
570 mfc_context->pre_deblocking_output.bo = NULL;
572 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
573 mfc_context->uncompressed_picture_source.bo = NULL;
575 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
576 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
578 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
579 if (mfc_context->direct_mv_buffers[i].bo != NULL)
580 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
581 mfc_context->direct_mv_buffers[i].bo = NULL;
584 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
585 if (mfc_context->reference_surfaces[i].bo != NULL)
586 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
587 mfc_context->reference_surfaces[i].bo = NULL;
590 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
591 bo = dri_bo_alloc(i965->intel.bufmgr,
596 mfc_context->intra_row_store_scratch_buffer.bo = bo;
598 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
599 bo = dri_bo_alloc(i965->intel.bufmgr,
601 width_in_mbs * height_in_mbs * 16,
604 mfc_context->macroblock_status_buffer.bo = bo;
606 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
607 bo = dri_bo_alloc(i965->intel.bufmgr,
609 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
612 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
614 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
615 bo = dri_bo_alloc(i965->intel.bufmgr,
617 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
620 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
622 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
623 mfc_context->mfc_batchbuffer_surface.bo = NULL;
625 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
626 mfc_context->aux_batchbuffer_surface.bo = NULL;
628 if (mfc_context->aux_batchbuffer)
629 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
631 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
632 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
633 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
634 mfc_context->aux_batchbuffer_surface.pitch = 16;
635 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
636 mfc_context->aux_batchbuffer_surface.size_block = 16;
638 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
642 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
643 struct intel_encoder_context *encoder_context)
645 struct intel_batchbuffer *batch = encoder_context->base.batch;
646 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
649 BEGIN_BCS_BATCH(batch, 61);
651 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
653 /* the DW1-3 is for pre_deblocking */
654 if (mfc_context->pre_deblocking_output.bo)
655 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
656 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
659 OUT_BCS_BATCH(batch, 0); /* pre output addr */
661 OUT_BCS_BATCH(batch, 0);
662 OUT_BCS_BATCH(batch, 0);
663 /* the DW4-6 is for the post_deblocking */
665 if (mfc_context->post_deblocking_output.bo)
666 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
667 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
668 0); /* post output addr */
670 OUT_BCS_BATCH(batch, 0);
672 OUT_BCS_BATCH(batch, 0);
673 OUT_BCS_BATCH(batch, 0);
675 /* the DW7-9 is for the uncompressed_picture */
676 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
677 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
678 0); /* uncompressed data */
680 OUT_BCS_BATCH(batch, 0);
681 OUT_BCS_BATCH(batch, 0);
683 /* the DW10-12 is for the mb status */
684 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
685 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
686 0); /* StreamOut data*/
688 OUT_BCS_BATCH(batch, 0);
689 OUT_BCS_BATCH(batch, 0);
691 /* the DW13-15 is for the intra_row_store_scratch */
692 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
693 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
696 OUT_BCS_BATCH(batch, 0);
697 OUT_BCS_BATCH(batch, 0);
699 /* the DW16-18 is for the deblocking filter */
700 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
701 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
704 OUT_BCS_BATCH(batch, 0);
705 OUT_BCS_BATCH(batch, 0);
707 /* the DW 19-50 is for Reference pictures*/
708 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
709 if ( mfc_context->reference_surfaces[i].bo != NULL) {
710 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
711 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
714 OUT_BCS_BATCH(batch, 0);
717 OUT_BCS_BATCH(batch, 0);
720 OUT_BCS_BATCH(batch, 0);
722 /* The DW 52-54 is for the MB status buffer */
723 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
724 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
725 0); /* Macroblock status buffer*/
727 OUT_BCS_BATCH(batch, 0);
728 OUT_BCS_BATCH(batch, 0);
730 /* the DW 55-57 is the ILDB buffer */
731 OUT_BCS_BATCH(batch, 0);
732 OUT_BCS_BATCH(batch, 0);
733 OUT_BCS_BATCH(batch, 0);
735 /* the DW 58-60 is the second ILDB buffer */
736 OUT_BCS_BATCH(batch, 0);
737 OUT_BCS_BATCH(batch, 0);
738 OUT_BCS_BATCH(batch, 0);
740 ADVANCE_BCS_BATCH(batch);
744 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
745 struct intel_encoder_context *encoder_context)
747 struct intel_batchbuffer *batch = encoder_context->base.batch;
748 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
752 BEGIN_BCS_BATCH(batch, 71);
754 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
756 /* Reference frames and Current frames */
757 /* the DW1-32 is for the direct MV for reference */
758 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
759 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
760 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
761 I915_GEM_DOMAIN_INSTRUCTION, 0,
763 OUT_BCS_BATCH(batch, 0);
765 OUT_BCS_BATCH(batch, 0);
766 OUT_BCS_BATCH(batch, 0);
770 OUT_BCS_BATCH(batch, 0);
772 /* the DW34-36 is the MV for the current reference */
773 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
774 I915_GEM_DOMAIN_INSTRUCTION, 0,
777 OUT_BCS_BATCH(batch, 0);
778 OUT_BCS_BATCH(batch, 0);
781 for(i = 0; i < 32; i++) {
782 OUT_BCS_BATCH(batch, i/2);
784 OUT_BCS_BATCH(batch, 0);
785 OUT_BCS_BATCH(batch, 0);
787 ADVANCE_BCS_BATCH(batch);
792 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
793 struct intel_encoder_context *encoder_context)
795 struct intel_batchbuffer *batch = encoder_context->base.batch;
796 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
798 BEGIN_BCS_BATCH(batch, 10);
800 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
801 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
802 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
804 OUT_BCS_BATCH(batch, 0);
805 OUT_BCS_BATCH(batch, 0);
807 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
808 OUT_BCS_BATCH(batch, 0);
809 OUT_BCS_BATCH(batch, 0);
810 OUT_BCS_BATCH(batch, 0);
812 /* the DW7-9 is for Bitplane Read Buffer Base Address */
813 OUT_BCS_BATCH(batch, 0);
814 OUT_BCS_BATCH(batch, 0);
815 OUT_BCS_BATCH(batch, 0);
817 ADVANCE_BCS_BATCH(batch);
821 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
822 struct encode_state *encode_state,
823 struct intel_encoder_context *encoder_context)
825 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
827 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
828 mfc_context->set_surface_state(ctx, encoder_context);
829 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
830 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
831 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
832 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
833 mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
834 mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
835 gen8_mfc_avc_directmode_state(ctx, encoder_context);
836 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
840 static VAStatus gen8_mfc_run(VADriverContextP ctx,
841 struct encode_state *encode_state,
842 struct intel_encoder_context *encoder_context)
844 struct intel_batchbuffer *batch = encoder_context->base.batch;
846 intel_batchbuffer_flush(batch); //run the pipeline
848 return VA_STATUS_SUCCESS;
853 gen8_mfc_stop(VADriverContextP ctx,
854 struct encode_state *encode_state,
855 struct intel_encoder_context *encoder_context,
856 int *encoded_bits_size)
858 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
859 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860 VACodedBufferSegment *coded_buffer_segment;
862 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
863 assert(vaStatus == VA_STATUS_SUCCESS);
864 *encoded_bits_size = coded_buffer_segment->size * 8;
865 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
867 return VA_STATUS_SUCCESS;
872 gen8_mfc_avc_slice_state(VADriverContextP ctx,
873 VAEncPictureParameterBufferH264 *pic_param,
874 VAEncSliceParameterBufferH264 *slice_param,
875 struct encode_state *encode_state,
876 struct intel_encoder_context *encoder_context,
877 int rate_control_enable,
879 struct intel_batchbuffer *batch)
881 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
882 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
883 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
884 int beginmb = slice_param->macroblock_address;
885 int endmb = beginmb + slice_param->num_macroblocks;
886 int beginx = beginmb % width_in_mbs;
887 int beginy = beginmb / width_in_mbs;
888 int nextx = endmb % width_in_mbs;
889 int nexty = endmb / width_in_mbs;
890 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
891 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
893 unsigned char correct[6], grow, shrink;
895 int weighted_pred_idc = 0;
896 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
897 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
898 int num_ref_l0 = 0, num_ref_l1 = 0;
901 batch = encoder_context->base.batch;
903 if (slice_type == SLICE_TYPE_I) {
904 luma_log2_weight_denom = 0;
905 chroma_log2_weight_denom = 0;
906 } else if (slice_type == SLICE_TYPE_P) {
907 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
908 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
910 if (slice_param->num_ref_idx_active_override_flag)
911 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
912 } else if (slice_type == SLICE_TYPE_B) {
913 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
914 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
915 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
917 if (slice_param->num_ref_idx_active_override_flag) {
918 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
919 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
922 if (weighted_pred_idc == 2) {
923 /* 8.4.3 - Derivation process for prediction weights (8-279) */
924 luma_log2_weight_denom = 5;
925 chroma_log2_weight_denom = 5;
929 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
930 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
932 for (i = 0; i < 6; i++)
933 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
935 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
936 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
937 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
938 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
940 BEGIN_BCS_BATCH(batch, 11);;
942 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
943 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
948 (chroma_log2_weight_denom << 8) |
949 (luma_log2_weight_denom << 0));
952 (weighted_pred_idc << 30) |
953 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
954 (slice_param->disable_deblocking_filter_idc << 27) |
955 (slice_param->cabac_init_idc << 24) |
956 (qp<<16) | /*Slice Quantization Parameter*/
957 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
958 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
960 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
962 slice_param->macroblock_address );
963 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
965 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
966 (1 << 30) | /*ResetRateControlCounter*/
967 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
968 (4 << 24) | /*RC Stable Tolerance, middle level*/
969 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
970 (0 << 22) | /*QP mode, don't modfiy CBP*/
971 (0 << 21) | /*MB Type Direct Conversion Enabled*/
972 (0 << 20) | /*MB Type Skip Conversion Enabled*/
973 (last_slice << 19) | /*IsLastSlice*/
974 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
975 (1 << 17) | /*HeaderPresentFlag*/
976 (1 << 16) | /*SliceData PresentFlag*/
977 (1 << 15) | /*TailPresentFlag*/
978 (1 << 13) | /*RBSP NAL TYPE*/
979 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
980 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
982 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
983 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
993 OUT_BCS_BATCH(batch, 0);
995 ADVANCE_BCS_BATCH(batch);
998 #define AVC_INTRA_RDO_OFFSET 4
999 #define AVC_INTER_RDO_OFFSET 10
1000 #define AVC_INTER_MSG_OFFSET 8
1001 #define AVC_INTER_MV_OFFSET 48
1002 #define AVC_RDO_MASK 0xFFFF
1005 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1006 int qp,unsigned int *msg,
1007 struct intel_encoder_context *encoder_context,
1008 unsigned char target_mb_size, unsigned char max_mb_size,
1009 struct intel_batchbuffer *batch)
1011 int len_in_dwords = 12;
1012 unsigned int intra_msg;
1013 #define INTRA_MSG_FLAG (1 << 13)
1014 #define INTRA_MBTYPE_MASK (0x1F0000)
1016 batch = encoder_context->base.batch;
1018 BEGIN_BCS_BATCH(batch, len_in_dwords);
1020 intra_msg = msg[0] & 0xC0FF;
1021 intra_msg |= INTRA_MSG_FLAG;
1022 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1023 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1024 OUT_BCS_BATCH(batch, 0);
1025 OUT_BCS_BATCH(batch, 0);
1026 OUT_BCS_BATCH(batch,
1027 (0 << 24) | /* PackedMvNum, Debug*/
1028 (0 << 20) | /* No motion vector */
1029 (1 << 19) | /* CbpDcY */
1030 (1 << 18) | /* CbpDcU */
1031 (1 << 17) | /* CbpDcV */
1034 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1035 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1036 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1038 /*Stuff for Intra MB*/
1039 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1040 OUT_BCS_BATCH(batch, msg[2]);
1041 OUT_BCS_BATCH(batch, msg[3]&0xFF);
1043 /*MaxSizeInWord and TargetSzieInWord*/
1044 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1045 (target_mb_size << 16) );
1047 OUT_BCS_BATCH(batch, 0);
1049 ADVANCE_BCS_BATCH(batch);
1051 return len_in_dwords;
1055 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1056 unsigned int *msg, unsigned int offset,
1057 struct intel_encoder_context *encoder_context,
1058 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1059 struct intel_batchbuffer *batch)
1061 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1062 int len_in_dwords = 12;
1063 unsigned int inter_msg = 0;
1065 batch = encoder_context->base.batch;
1067 #define MSG_MV_OFFSET 4
1068 unsigned int *mv_ptr;
1069 mv_ptr = msg + MSG_MV_OFFSET;
1070 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1071 * to convert them to be compatible with the format of AVC_PAK
1074 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1075 /* MV[0] and MV[2] are replicated */
1076 mv_ptr[4] = mv_ptr[0];
1077 mv_ptr[5] = mv_ptr[1];
1078 mv_ptr[2] = mv_ptr[8];
1079 mv_ptr[3] = mv_ptr[9];
1080 mv_ptr[6] = mv_ptr[8];
1081 mv_ptr[7] = mv_ptr[9];
1082 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1083 /* MV[0] and MV[1] are replicated */
1084 mv_ptr[2] = mv_ptr[0];
1085 mv_ptr[3] = mv_ptr[1];
1086 mv_ptr[4] = mv_ptr[16];
1087 mv_ptr[5] = mv_ptr[17];
1088 mv_ptr[6] = mv_ptr[24];
1089 mv_ptr[7] = mv_ptr[25];
1090 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1091 !(msg[1] & SUBMB_SHAPE_MASK)) {
1092 /* Don't touch MV[0] or MV[1] */
1093 mv_ptr[2] = mv_ptr[8];
1094 mv_ptr[3] = mv_ptr[9];
1095 mv_ptr[4] = mv_ptr[16];
1096 mv_ptr[5] = mv_ptr[17];
1097 mv_ptr[6] = mv_ptr[24];
1098 mv_ptr[7] = mv_ptr[25];
1102 BEGIN_BCS_BATCH(batch, len_in_dwords);
1104 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1108 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1109 if (msg[1] & SUBMB_SHAPE_MASK)
1112 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1113 OUT_BCS_BATCH(batch, offset);
1114 inter_msg = msg[0] & (0x1F00FFFF);
1115 inter_msg |= INTER_MV8;
1116 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1117 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1118 (msg[1] & SUBMB_SHAPE_MASK)) {
1119 inter_msg |= INTER_MV32;
1122 OUT_BCS_BATCH(batch, inter_msg);
1124 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1125 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1127 if ( slice_type == SLICE_TYPE_B) {
1128 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1130 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1133 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1136 inter_msg = msg[1] >> 8;
1137 /*Stuff for Inter MB*/
1138 OUT_BCS_BATCH(batch, inter_msg);
1139 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1140 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1142 /*MaxSizeInWord and TargetSzieInWord*/
1143 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1144 (target_mb_size << 16) );
1146 OUT_BCS_BATCH(batch, 0x0);
1148 ADVANCE_BCS_BATCH(batch);
1150 return len_in_dwords;
1154 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1155 struct encode_state *encode_state,
1156 struct intel_encoder_context *encoder_context,
1158 struct intel_batchbuffer *slice_batch)
1160 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1161 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1162 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1163 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1164 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1165 unsigned int *msg = NULL, offset = 0;
1166 unsigned char *msg_ptr = NULL;
1167 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1168 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1169 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1171 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1172 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1173 unsigned int tail_data[] = { 0x0, 0x0 };
1174 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1175 int is_intra = slice_type == SLICE_TYPE_I;
1180 if (rate_control_mode == VA_RC_CBR) {
1181 qp = mfc_context->brc.qp_prime_y[0][slice_type];
1182 if (encode_state->slice_header_index[slice_index] == 0) {
1183 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1188 /* only support for 8-bit pixel bit-depth */
1189 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1190 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1191 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1192 assert(qp >= 0 && qp < 52);
1194 gen8_mfc_avc_slice_state(ctx,
1197 encode_state, encoder_context,
1198 (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1200 if ( slice_index == 0)
1201 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1203 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1205 dri_bo_map(vme_context->vme_output.bo , 1);
1206 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1209 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1211 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1214 for (i = pSliceParameter->macroblock_address;
1215 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217 x = i % width_in_mbs;
1218 y = i / width_in_mbs;
1219 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220 if (vme_context->roi_enabled) {
1221 qp_mb = *(vme_context->qp_per_mb + i);
1227 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1229 int inter_rdo, intra_rdo;
1230 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1231 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1232 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1233 if (intra_rdo < inter_rdo) {
1234 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1236 msg += AVC_INTER_MSG_OFFSET;
1237 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1242 dri_bo_unmap(vme_context->vme_output.bo);
1245 mfc_context->insert_object(ctx, encoder_context,
1247 2, 1, 1, 0, slice_batch);
1249 mfc_context->insert_object(ctx, encoder_context,
1251 1, 1, 1, 0, slice_batch);
1256 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1257 struct encode_state *encode_state,
1258 struct intel_encoder_context *encoder_context)
1260 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1261 struct intel_batchbuffer *batch;
1265 batch = mfc_context->aux_batchbuffer;
1266 batch_bo = batch->buffer;
1267 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1268 gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1271 intel_batchbuffer_align(batch, 8);
1273 BEGIN_BCS_BATCH(batch, 2);
1274 OUT_BCS_BATCH(batch, 0);
1275 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1276 ADVANCE_BCS_BATCH(batch);
1278 dri_bo_reference(batch_bo);
1279 intel_batchbuffer_free(batch);
1280 mfc_context->aux_batchbuffer = NULL;
1287 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1288 struct encode_state *encode_state,
1289 struct intel_encoder_context *encoder_context)
1291 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1292 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1294 assert(vme_context->vme_output.bo);
1295 mfc_context->buffer_suface_setup(ctx,
1296 &mfc_context->gpe_context,
1297 &vme_context->vme_output,
1298 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1299 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1303 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1304 struct encode_state *encode_state,
1305 struct intel_encoder_context *encoder_context)
1307 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1308 assert(mfc_context->aux_batchbuffer_surface.bo);
1309 mfc_context->buffer_suface_setup(ctx,
1310 &mfc_context->gpe_context,
1311 &mfc_context->aux_batchbuffer_surface,
1312 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1313 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1317 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1318 struct encode_state *encode_state,
1319 struct intel_encoder_context *encoder_context)
1321 gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1322 gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1326 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1327 struct encode_state *encode_state,
1328 struct intel_encoder_context *encoder_context)
1330 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1331 struct gen8_interface_descriptor_data *desc;
1334 unsigned char *desc_ptr;
1336 bo = mfc_context->gpe_context.dynamic_state.bo;
1338 assert(bo->virtual);
1339 desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
1341 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1343 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1344 struct i965_kernel *kernel;
1345 kernel = &mfc_context->gpe_context.kernels[i];
1346 assert(sizeof(*desc) == 32);
1347 /*Setup the descritor table*/
1348 memset(desc, 0, sizeof(*desc));
1349 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1350 desc->desc3.sampler_count = 0;
1351 desc->desc3.sampler_state_pointer = 0;
1352 desc->desc4.binding_table_entry_count = 1;
1353 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1354 desc->desc5.constant_urb_entry_read_offset = 0;
1355 desc->desc5.constant_urb_entry_read_length = 4;
1367 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1368 struct encode_state *encode_state,
1369 struct intel_encoder_context *encoder_context)
1371 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1376 #define AVC_PAK_LEN_IN_BYTE 48
1377 #define AVC_PAK_LEN_IN_OWORD 3
1380 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1381 uint32_t intra_flag,
1393 uint32_t temp_value;
1394 BEGIN_BATCH(batch, 14);
1396 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1397 OUT_BATCH(batch, 0);
1398 OUT_BATCH(batch, 0);
1399 OUT_BATCH(batch, 0);
1400 OUT_BATCH(batch, 0);
1401 OUT_BATCH(batch, 0);
1404 OUT_BATCH(batch, head_offset / 16);
1405 OUT_BATCH(batch, (intra_flag) | (qp << 16));
1406 temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1407 OUT_BATCH(batch, temp_value);
1409 OUT_BATCH(batch, number_mb_cmds);
1412 ((slice_end_y << 8) | (slice_end_x)));
1413 OUT_BATCH(batch, fwd_ref);
1414 OUT_BATCH(batch, bwd_ref);
1416 OUT_BATCH(batch, MI_NOOP);
1418 ADVANCE_BATCH(batch);
1422 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1423 struct intel_encoder_context *encoder_context,
1424 VAEncSliceParameterBufferH264 *slice_param,
1429 struct intel_batchbuffer *batch = encoder_context->base.batch;
1430 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1431 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1432 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1433 int total_mbs = slice_param->num_macroblocks;
1434 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1435 int number_mb_cmds = 128;
1436 int starting_offset = 0;
1438 int last_mb, slice_end_x, slice_end_y;
1439 int remaining_mb = total_mbs;
1440 uint32_t fwd_ref , bwd_ref, mb_flag;
1442 int number_roi_mbs, max_mb_cmds, i;
1444 last_mb = slice_param->macroblock_address + total_mbs - 1;
1445 slice_end_x = last_mb % width_in_mbs;
1446 slice_end_y = last_mb / width_in_mbs;
1448 if (slice_type == SLICE_TYPE_I) {
1453 fwd_ref = vme_context->ref_index_in_mb[0];
1454 bwd_ref = vme_context->ref_index_in_mb[1];
1458 if (width_in_mbs >= 100) {
1459 number_mb_cmds = width_in_mbs / 5;
1460 } else if (width_in_mbs >= 80) {
1461 number_mb_cmds = width_in_mbs / 4;
1462 } else if (width_in_mbs >= 60) {
1463 number_mb_cmds = width_in_mbs / 3;
1464 } else if (width_in_mbs >= 40) {
1465 number_mb_cmds = width_in_mbs / 2;
1467 number_mb_cmds = width_in_mbs;
1470 max_mb_cmds = number_mb_cmds;
1473 mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1474 mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1476 number_mb_cmds = max_mb_cmds;
1477 if (vme_context->roi_enabled) {
1480 tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1481 for (i = 1; i < max_mb_cmds; i++) {
1482 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1488 number_mb_cmds = number_roi_mbs;
1492 if (number_mb_cmds >= remaining_mb) {
1493 number_mb_cmds = remaining_mb;
1496 gen8_mfc_batchbuffer_emit_object_command(batch,
1509 head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1510 remaining_mb -= number_mb_cmds;
1511 starting_offset += number_mb_cmds;
1512 } while (remaining_mb > 0);
1516 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1517 struct encode_state *encode_state,
1518 struct intel_encoder_context *encoder_context,
1521 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1522 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1523 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1524 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1525 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1526 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1527 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1528 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1529 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1530 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1531 unsigned int tail_data[] = { 0x0, 0x0 };
1533 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1537 if (rate_control_mode == VA_RC_CBR) {
1538 qp = mfc_context->brc.qp_prime_y[0][slice_type];
1539 if (encode_state->slice_header_index[slice_index] == 0) {
1540 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1545 /* only support for 8-bit pixel bit-depth */
1546 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1547 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1548 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1549 assert(qp >= 0 && qp < 52);
1551 gen8_mfc_avc_slice_state(ctx,
1556 (rate_control_mode == VA_RC_CBR),
1560 if (slice_index == 0)
1561 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1563 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1565 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1566 head_offset = intel_batchbuffer_used_size(slice_batch);
1568 slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1570 gen8_mfc_avc_batchbuffer_slice_command(ctx,
1578 /* Aligned for tail */
1579 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1581 mfc_context->insert_object(ctx,
1592 mfc_context->insert_object(ctx,
1608 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1609 struct encode_state *encode_state,
1610 struct intel_encoder_context *encoder_context)
1612 struct i965_driver_data *i965 = i965_driver_data(ctx);
1613 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1614 struct intel_batchbuffer *batch = encoder_context->base.batch;
1617 intel_batchbuffer_start_atomic(batch, 0x4000);
1619 if (IS_GEN9(i965->intel.device_info))
1620 gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1622 gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1624 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1625 gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1628 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1630 intel_batchbuffer_align(slice_batch, 8);
1631 BEGIN_BCS_BATCH(slice_batch, 2);
1632 OUT_BCS_BATCH(slice_batch, 0);
1633 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1634 ADVANCE_BCS_BATCH(slice_batch);
1636 BEGIN_BATCH(batch, 2);
1637 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1638 OUT_BATCH(batch, 0);
1639 ADVANCE_BATCH(batch);
1642 intel_batchbuffer_end_atomic(batch);
1643 intel_batchbuffer_flush(batch);
1645 if (IS_GEN9(i965->intel.device_info))
1646 gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1650 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1651 struct encode_state *encode_state,
1652 struct intel_encoder_context *encoder_context)
1654 gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1655 gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1656 gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1657 gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1661 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1662 struct encode_state *encode_state,
1663 struct intel_encoder_context *encoder_context)
1665 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1667 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1668 gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1670 return mfc_context->aux_batchbuffer_surface.bo;
1674 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1675 struct encode_state *encode_state,
1676 struct intel_encoder_context *encoder_context)
1678 struct intel_batchbuffer *batch = encoder_context->base.batch;
1679 dri_bo *slice_batch_bo;
1681 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1682 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1687 if (encoder_context->soft_batch_force)
1688 slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1690 slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1694 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1695 intel_batchbuffer_emit_mi_flush(batch);
1697 // picture level programing
1698 gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1700 BEGIN_BCS_BATCH(batch, 3);
1701 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1702 OUT_BCS_RELOC(batch,
1704 I915_GEM_DOMAIN_COMMAND, 0,
1706 OUT_BCS_BATCH(batch, 0);
1707 ADVANCE_BCS_BATCH(batch);
1710 intel_batchbuffer_end_atomic(batch);
1712 dri_bo_unreference(slice_batch_bo);
1717 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1718 struct encode_state *encode_state,
1719 struct intel_encoder_context *encoder_context)
1721 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1722 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1723 int current_frame_bits_size;
1727 gen8_mfc_init(ctx, encode_state, encoder_context);
1728 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1729 /*Programing bcs pipeline*/
1730 gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1731 gen8_mfc_run(ctx, encode_state, encoder_context);
1732 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1733 gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1734 sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1735 if (sts == BRC_NO_HRD_VIOLATION) {
1736 intel_mfc_hrd_context_update(encode_state, mfc_context);
1739 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1740 if (!mfc_context->hrd.violation_noted) {
1741 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1742 mfc_context->hrd.violation_noted = 1;
1744 return VA_STATUS_SUCCESS;
1751 return VA_STATUS_SUCCESS;
1759 va_to_gen8_mpeg2_picture_type[3] = {
1766 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1767 struct intel_encoder_context *encoder_context,
1768 struct encode_state *encode_state)
1770 struct intel_batchbuffer *batch = encoder_context->base.batch;
1771 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1772 VAEncPictureParameterBufferMPEG2 *pic_param;
1773 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1774 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1775 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1777 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1778 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1779 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1781 BEGIN_BCS_BATCH(batch, 13);
1782 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1783 OUT_BCS_BATCH(batch,
1784 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1785 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1786 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1787 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1788 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1789 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1790 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1791 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1792 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1793 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1794 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1795 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1796 OUT_BCS_BATCH(batch,
1797 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1798 va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1800 OUT_BCS_BATCH(batch,
1801 1 << 31 | /* slice concealment */
1802 (height_in_mbs - 1) << 16 |
1803 (width_in_mbs - 1));
1805 if (slice_param && slice_param->quantiser_scale_code >= 14)
1806 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1808 OUT_BCS_BATCH(batch, 0);
1810 OUT_BCS_BATCH(batch, 0);
1811 OUT_BCS_BATCH(batch,
1812 0xFFF << 16 | /* InterMBMaxSize */
1813 0xFFF << 0 | /* IntraMBMaxSize */
1815 OUT_BCS_BATCH(batch, 0);
1816 OUT_BCS_BATCH(batch, 0);
1817 OUT_BCS_BATCH(batch, 0);
1818 OUT_BCS_BATCH(batch, 0);
1819 OUT_BCS_BATCH(batch, 0);
1820 OUT_BCS_BATCH(batch, 0);
1821 ADVANCE_BCS_BATCH(batch);
1825 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1827 unsigned char intra_qm[64] = {
1828 8, 16, 19, 22, 26, 27, 29, 34,
1829 16, 16, 22, 24, 27, 29, 34, 37,
1830 19, 22, 26, 27, 29, 34, 34, 38,
1831 22, 22, 26, 27, 29, 34, 37, 40,
1832 22, 26, 27, 29, 32, 35, 40, 48,
1833 26, 27, 29, 32, 35, 40, 48, 58,
1834 26, 27, 29, 34, 38, 46, 56, 69,
1835 27, 29, 35, 38, 46, 56, 69, 83
1838 unsigned char non_intra_qm[64] = {
1839 16, 16, 16, 16, 16, 16, 16, 16,
1840 16, 16, 16, 16, 16, 16, 16, 16,
1841 16, 16, 16, 16, 16, 16, 16, 16,
1842 16, 16, 16, 16, 16, 16, 16, 16,
1843 16, 16, 16, 16, 16, 16, 16, 16,
1844 16, 16, 16, 16, 16, 16, 16, 16,
1845 16, 16, 16, 16, 16, 16, 16, 16,
1846 16, 16, 16, 16, 16, 16, 16, 16
1849 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1850 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1854 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1856 unsigned short intra_fqm[64] = {
1857 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1858 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1859 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1860 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1861 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1862 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1863 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1864 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1867 unsigned short non_intra_fqm[64] = {
1868 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1869 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1875 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1878 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1879 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1883 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1884 struct intel_encoder_context *encoder_context,
1886 int next_x, int next_y,
1887 int is_fisrt_slice_group,
1888 int is_last_slice_group,
1891 struct intel_batchbuffer *batch)
1893 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1896 batch = encoder_context->base.batch;
1898 BEGIN_BCS_BATCH(batch, 8);
1900 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1901 OUT_BCS_BATCH(batch,
1902 0 << 31 | /* MbRateCtrlFlag */
1903 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1904 1 << 17 | /* Insert Header before the first slice group data */
1905 1 << 16 | /* SliceData PresentFlag: always 1 */
1906 1 << 15 | /* TailPresentFlag: always 1 */
1907 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1908 !!intra_slice << 13 | /* IntraSlice */
1909 !!intra_slice << 12 | /* IntraSliceFlag */
1911 OUT_BCS_BATCH(batch,
1917 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1918 /* bitstream pointer is only loaded once for the first slice of a frame when
1919 * LoadSlicePointerFlag is 0
1921 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1922 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1923 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1924 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1926 ADVANCE_BCS_BATCH(batch);
1930 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1931 struct intel_encoder_context *encoder_context,
1933 int first_mb_in_slice,
1934 int last_mb_in_slice,
1935 int first_mb_in_slice_group,
1936 int last_mb_in_slice_group,
1939 int coded_block_pattern,
1940 unsigned char target_size_in_word,
1941 unsigned char max_size_in_word,
1942 struct intel_batchbuffer *batch)
1944 int len_in_dwords = 9;
1947 batch = encoder_context->base.batch;
1949 BEGIN_BCS_BATCH(batch, len_in_dwords);
1951 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1952 OUT_BCS_BATCH(batch,
1953 0 << 24 | /* PackedMvNum */
1954 0 << 20 | /* MvFormat */
1955 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1956 0 << 15 | /* TransformFlag: frame DCT */
1957 0 << 14 | /* FieldMbFlag */
1958 1 << 13 | /* IntraMbFlag */
1959 mb_type << 8 | /* MbType: Intra */
1960 0 << 2 | /* SkipMbFlag */
1961 0 << 0 | /* InterMbMode */
1963 OUT_BCS_BATCH(batch, y << 16 | x);
1964 OUT_BCS_BATCH(batch,
1965 max_size_in_word << 24 |
1966 target_size_in_word << 16 |
1967 coded_block_pattern << 6 | /* CBP */
1969 OUT_BCS_BATCH(batch,
1970 last_mb_in_slice << 31 |
1971 first_mb_in_slice << 30 |
1972 0 << 27 | /* EnableCoeffClamp */
1973 last_mb_in_slice_group << 26 |
1974 0 << 25 | /* MbSkipConvDisable */
1975 first_mb_in_slice_group << 24 |
1976 0 << 16 | /* MvFieldSelect */
1977 qp_scale_code << 0 |
1979 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1980 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1981 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1982 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1984 ADVANCE_BCS_BATCH(batch);
1986 return len_in_dwords;
1990 #define MPEG2_INTER_MV_OFFSET 48
1992 static struct _mv_ranges
1994 int low; /* in the unit of 1/2 pixel */
1995 int high; /* in the unit of 1/2 pixel */
2010 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2012 if (mv + pos * 16 * 2 < 0 ||
2013 mv + (pos + 1) * 16 * 2 > display_max * 2)
2016 if (f_code > 0 && f_code < 10) {
2017 if (mv < mv_ranges[f_code].low)
2018 mv = mv_ranges[f_code].low;
2020 if (mv > mv_ranges[f_code].high)
2021 mv = mv_ranges[f_code].high;
2028 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2029 struct encode_state *encode_state,
2030 struct intel_encoder_context *encoder_context,
2032 int width_in_mbs, int height_in_mbs,
2034 int first_mb_in_slice,
2035 int last_mb_in_slice,
2036 int first_mb_in_slice_group,
2037 int last_mb_in_slice_group,
2039 unsigned char target_size_in_word,
2040 unsigned char max_size_in_word,
2041 struct intel_batchbuffer *batch)
2043 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2044 int len_in_dwords = 9;
2045 short *mvptr, mvx0, mvy0, mvx1, mvy1;
2048 batch = encoder_context->base.batch;
2050 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2051 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2052 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2053 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2054 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2056 BEGIN_BCS_BATCH(batch, len_in_dwords);
2058 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2059 OUT_BCS_BATCH(batch,
2060 2 << 24 | /* PackedMvNum */
2061 7 << 20 | /* MvFormat */
2062 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
2063 0 << 15 | /* TransformFlag: frame DCT */
2064 0 << 14 | /* FieldMbFlag */
2065 0 << 13 | /* IntraMbFlag */
2066 1 << 8 | /* MbType: Frame-based */
2067 0 << 2 | /* SkipMbFlag */
2068 0 << 0 | /* InterMbMode */
2070 OUT_BCS_BATCH(batch, y << 16 | x);
2071 OUT_BCS_BATCH(batch,
2072 max_size_in_word << 24 |
2073 target_size_in_word << 16 |
2074 0x3f << 6 | /* CBP */
2076 OUT_BCS_BATCH(batch,
2077 last_mb_in_slice << 31 |
2078 first_mb_in_slice << 30 |
2079 0 << 27 | /* EnableCoeffClamp */
2080 last_mb_in_slice_group << 26 |
2081 0 << 25 | /* MbSkipConvDisable */
2082 first_mb_in_slice_group << 24 |
2083 0 << 16 | /* MvFieldSelect */
2084 qp_scale_code << 0 |
2087 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
2088 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
2089 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
2090 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
2092 ADVANCE_BCS_BATCH(batch);
2094 return len_in_dwords;
2098 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2099 struct encode_state *encode_state,
2100 struct intel_encoder_context *encoder_context,
2101 struct intel_batchbuffer *slice_batch)
2103 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2104 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2106 if (encode_state->packed_header_data[idx]) {
2107 VAEncPackedHeaderParameterBuffer *param = NULL;
2108 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2109 unsigned int length_in_bits;
2111 assert(encode_state->packed_header_param[idx]);
2112 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2113 length_in_bits = param->bit_length;
2115 mfc_context->insert_object(ctx,
2118 ALIGN(length_in_bits, 32) >> 5,
2119 length_in_bits & 0x1f,
2120 5, /* FIXME: check it */
2123 0, /* Needn't insert emulation bytes for MPEG-2 */
2127 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2129 if (encode_state->packed_header_data[idx]) {
2130 VAEncPackedHeaderParameterBuffer *param = NULL;
2131 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2132 unsigned int length_in_bits;
2134 assert(encode_state->packed_header_param[idx]);
2135 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2136 length_in_bits = param->bit_length;
2138 mfc_context->insert_object(ctx,
2141 ALIGN(length_in_bits, 32) >> 5,
2142 length_in_bits & 0x1f,
2143 5, /* FIXME: check it */
2146 0, /* Needn't insert emulation bytes for MPEG-2 */
2152 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2153 struct encode_state *encode_state,
2154 struct intel_encoder_context *encoder_context,
2156 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2157 struct intel_batchbuffer *slice_batch)
2159 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2160 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2161 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2162 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2163 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2164 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2165 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2166 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2168 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2169 unsigned int *msg = NULL;
2170 unsigned char *msg_ptr = NULL;
2172 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2173 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2174 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2175 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2177 dri_bo_map(vme_context->vme_output.bo , 0);
2178 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2180 if (next_slice_group_param) {
2181 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2182 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2184 h_next_start_pos = 0;
2185 v_next_start_pos = height_in_mbs;
2188 gen8_mfc_mpeg2_slicegroup_state(ctx,
2195 next_slice_group_param == NULL,
2196 slice_param->is_intra_slice,
2197 slice_param->quantiser_scale_code,
2200 if (slice_index == 0)
2201 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2203 /* Insert '00' to make sure the header is valid */
2204 mfc_context->insert_object(ctx,
2206 (unsigned int*)section_delimiter,
2208 8, /* 8bits in the last DWORD */
2215 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2216 /* PAK for each macroblocks */
2217 for (j = 0; j < slice_param->num_macroblocks; j++) {
2218 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2219 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2220 int first_mb_in_slice = (j == 0);
2221 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2222 int first_mb_in_slice_group = (i == 0 && j == 0);
2223 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2224 j == slice_param->num_macroblocks - 1);
2226 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2228 if (slice_param->is_intra_slice) {
2229 gen8_mfc_mpeg2_pak_object_intra(ctx,
2234 first_mb_in_slice_group,
2235 last_mb_in_slice_group,
2237 slice_param->quantiser_scale_code,
2243 int inter_rdo, intra_rdo;
2244 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2245 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2247 if (intra_rdo < inter_rdo)
2248 gen8_mfc_mpeg2_pak_object_intra(ctx,
2253 first_mb_in_slice_group,
2254 last_mb_in_slice_group,
2256 slice_param->quantiser_scale_code,
2262 gen8_mfc_mpeg2_pak_object_inter(ctx,
2266 width_in_mbs, height_in_mbs,
2270 first_mb_in_slice_group,
2271 last_mb_in_slice_group,
2272 slice_param->quantiser_scale_code,
2282 dri_bo_unmap(vme_context->vme_output.bo);
2285 if (next_slice_group_param == NULL) { /* end of a picture */
2286 mfc_context->insert_object(ctx,
2288 (unsigned int *)tail_delimiter,
2290 8, /* 8bits in the last DWORD */
2296 } else { /* end of a lsice group */
2297 mfc_context->insert_object(ctx,
2299 (unsigned int *)section_delimiter,
2301 8, /* 8bits in the last DWORD */
2311 * A batch buffer for all slices, including slice state,
2312 * slice insert object and slice pak object commands
2316 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2317 struct encode_state *encode_state,
2318 struct intel_encoder_context *encoder_context)
2320 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2321 struct intel_batchbuffer *batch;
2322 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2326 batch = mfc_context->aux_batchbuffer;
2327 batch_bo = batch->buffer;
2329 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2330 if (i == encode_state->num_slice_params_ext - 1)
2331 next_slice_group_param = NULL;
2333 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2335 gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2338 intel_batchbuffer_align(batch, 8);
2340 BEGIN_BCS_BATCH(batch, 2);
2341 OUT_BCS_BATCH(batch, 0);
2342 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2343 ADVANCE_BCS_BATCH(batch);
2345 dri_bo_reference(batch_bo);
2346 intel_batchbuffer_free(batch);
2347 mfc_context->aux_batchbuffer = NULL;
2353 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2354 struct encode_state *encode_state,
2355 struct intel_encoder_context *encoder_context)
2357 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2359 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2360 mfc_context->set_surface_state(ctx, encoder_context);
2361 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2362 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2363 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2364 gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2365 gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2366 gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2370 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2371 struct encode_state *encode_state,
2372 struct intel_encoder_context *encoder_context)
2374 struct intel_batchbuffer *batch = encoder_context->base.batch;
2375 dri_bo *slice_batch_bo;
2377 slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2380 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2381 intel_batchbuffer_emit_mi_flush(batch);
2383 // picture level programing
2384 gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2386 BEGIN_BCS_BATCH(batch, 4);
2387 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2388 OUT_BCS_RELOC(batch,
2390 I915_GEM_DOMAIN_COMMAND, 0,
2392 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_BATCH(batch, 0);
2394 ADVANCE_BCS_BATCH(batch);
2397 intel_batchbuffer_end_atomic(batch);
2399 dri_bo_unreference(slice_batch_bo);
2403 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2404 struct encode_state *encode_state,
2405 struct intel_encoder_context *encoder_context)
2407 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2408 struct object_surface *obj_surface;
2409 struct object_buffer *obj_buffer;
2410 struct i965_coded_buffer_segment *coded_buffer_segment;
2411 VAStatus vaStatus = VA_STATUS_SUCCESS;
2415 /* reconstructed surface */
2416 obj_surface = encode_state->reconstructed_object;
2417 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2418 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2419 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2420 mfc_context->surface_state.width = obj_surface->orig_width;
2421 mfc_context->surface_state.height = obj_surface->orig_height;
2422 mfc_context->surface_state.w_pitch = obj_surface->width;
2423 mfc_context->surface_state.h_pitch = obj_surface->height;
2425 /* forward reference */
2426 obj_surface = encode_state->reference_objects[0];
2428 if (obj_surface && obj_surface->bo) {
2429 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2430 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2432 mfc_context->reference_surfaces[0].bo = NULL;
2434 /* backward reference */
2435 obj_surface = encode_state->reference_objects[1];
2437 if (obj_surface && obj_surface->bo) {
2438 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2439 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2441 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2443 if (mfc_context->reference_surfaces[1].bo)
2444 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2447 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2448 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2450 if (mfc_context->reference_surfaces[i].bo)
2451 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2454 /* input YUV surface */
2455 obj_surface = encode_state->input_yuv_object;
2456 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2457 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2460 obj_buffer = encode_state->coded_buf_object;
2461 bo = obj_buffer->buffer_store->bo;
2462 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2463 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2464 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2465 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2467 /* set the internal flag to 0 to indicate the coded size is unknown */
2469 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2470 coded_buffer_segment->mapped = 0;
2471 coded_buffer_segment->codec = encoder_context->codec;
2478 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2479 struct encode_state *encode_state,
2480 struct intel_encoder_context *encoder_context)
2482 gen8_mfc_init(ctx, encode_state, encoder_context);
2483 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2484 /*Programing bcs pipeline*/
2485 gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2486 gen8_mfc_run(ctx, encode_state, encoder_context);
2488 return VA_STATUS_SUCCESS;
2491 /* JPEG encode methods */
2494 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2495 struct encode_state *encode_state,
2496 struct intel_encoder_context *encoder_context)
2498 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2499 struct object_surface *obj_surface;
2500 struct object_buffer *obj_buffer;
2501 struct i965_coded_buffer_segment *coded_buffer_segment;
2502 VAStatus vaStatus = VA_STATUS_SUCCESS;
2505 /* input YUV surface */
2506 obj_surface = encode_state->input_yuv_object;
2507 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2508 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2511 obj_buffer = encode_state->coded_buf_object;
2512 bo = obj_buffer->buffer_store->bo;
2513 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2514 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2515 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2516 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2518 /* set the internal flag to 0 to indicate the coded size is unknown */
2520 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2521 coded_buffer_segment->mapped = 0;
2522 coded_buffer_segment->codec = encoder_context->codec;
2530 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2531 struct intel_encoder_context *encoder_context,
2532 struct encode_state *encode_state)
2534 struct intel_batchbuffer *batch = encoder_context->base.batch;
2535 struct object_surface *obj_surface = encode_state->input_yuv_object;
2536 unsigned int input_fourcc;
2537 unsigned int y_cb_offset;
2538 unsigned int y_cr_offset;
2539 unsigned int surface_format;
2541 assert(obj_surface);
2543 y_cb_offset = obj_surface->y_cb_offset;
2544 y_cr_offset = obj_surface->y_cr_offset;
2545 input_fourcc = obj_surface->fourcc;
2547 surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2548 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2551 switch (input_fourcc) {
2552 case VA_FOURCC_Y800: {
2553 surface_format = MFX_SURFACE_MONOCHROME;
2556 case VA_FOURCC_NV12: {
2557 surface_format = MFX_SURFACE_PLANAR_420_8;
2560 case VA_FOURCC_UYVY: {
2561 surface_format = MFX_SURFACE_YCRCB_SWAPY;
2564 case VA_FOURCC_YUY2: {
2565 surface_format = MFX_SURFACE_YCRCB_NORMAL;
2568 case VA_FOURCC_RGBA:
2569 case VA_FOURCC_444P: {
2570 surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2575 BEGIN_BCS_BATCH(batch, 6);
2577 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2578 OUT_BCS_BATCH(batch, 0);
2579 OUT_BCS_BATCH(batch,
2580 ((obj_surface->orig_height - 1) << 18) |
2581 ((obj_surface->orig_width - 1) << 4));
2582 OUT_BCS_BATCH(batch,
2583 (surface_format << 28) | /* Surface Format */
2584 (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2585 (0 << 22) | /* surface object control state, FIXME??? */
2586 ((obj_surface->width - 1) << 3) | /* pitch */
2587 (0 << 2) | /* must be 0 for interleave U/V */
2588 (1 << 1) | /* must be tiled */
2589 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2590 OUT_BCS_BATCH(batch,
2591 (0 << 16) | /* X offset for U(Cb), must be 0 */
2592 (y_cb_offset << 0)); /* Y offset for U(Cb) */
2593 OUT_BCS_BATCH(batch,
2594 (0 << 16) | /* X offset for V(Cr), must be 0 */
2595 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2598 ADVANCE_BCS_BATCH(batch);
2602 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2603 struct intel_encoder_context *encoder_context,
2604 struct encode_state *encode_state)
2606 struct intel_batchbuffer *batch = encoder_context->base.batch;
2607 struct object_surface *obj_surface = encode_state->input_yuv_object;
2608 VAEncPictureParameterBufferJPEG *pic_param;
2609 unsigned int surface_format;
2610 unsigned int frame_width_in_blks;
2611 unsigned int frame_height_in_blks;
2612 unsigned int pixels_in_horizontal_lastMCU;
2613 unsigned int pixels_in_vertical_lastMCU;
2614 unsigned int input_surface_format;
2615 unsigned int output_mcu_format;
2616 unsigned int picture_width;
2617 unsigned int picture_height;
2619 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2620 assert(obj_surface);
2621 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2622 surface_format = obj_surface->fourcc;
2623 picture_width = pic_param->picture_width;
2624 picture_height = pic_param->picture_height;
2626 switch (surface_format) {
2627 case VA_FOURCC_Y800: {
2628 input_surface_format = JPEG_ENC_SURFACE_Y8;
2629 output_mcu_format = JPEG_ENC_MCU_YUV400;
2632 case VA_FOURCC_NV12: {
2633 input_surface_format = JPEG_ENC_SURFACE_NV12;
2634 output_mcu_format = JPEG_ENC_MCU_YUV420;
2637 case VA_FOURCC_UYVY: {
2638 input_surface_format = JPEG_ENC_SURFACE_UYVY;
2639 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2642 case VA_FOURCC_YUY2: {
2643 input_surface_format = JPEG_ENC_SURFACE_YUY2;
2644 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2648 case VA_FOURCC_RGBA:
2649 case VA_FOURCC_444P: {
2650 input_surface_format = JPEG_ENC_SURFACE_RGB;
2651 output_mcu_format = JPEG_ENC_MCU_RGB;
2655 input_surface_format = JPEG_ENC_SURFACE_NV12;
2656 output_mcu_format = JPEG_ENC_MCU_YUV420;
2662 switch (output_mcu_format) {
2664 case JPEG_ENC_MCU_YUV400:
2665 case JPEG_ENC_MCU_RGB: {
2666 pixels_in_horizontal_lastMCU = (picture_width % 8);
2667 pixels_in_vertical_lastMCU = (picture_height % 8);
2669 //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2670 frame_width_in_blks = ((picture_width + 7) / 8);
2671 frame_height_in_blks = ((picture_height + 7) / 8);
2675 case JPEG_ENC_MCU_YUV420: {
2676 if((picture_width % 2) == 0)
2677 pixels_in_horizontal_lastMCU = picture_width % 16;
2679 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2681 if((picture_height % 2) == 0)
2682 pixels_in_vertical_lastMCU = picture_height % 16;
2684 pixels_in_vertical_lastMCU = ((picture_height % 16) + 1) % 16;
2686 //H1=2,V1=2 for YUV420. So, compute these values accordingly
2687 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2688 frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2692 case JPEG_ENC_MCU_YUV422H_2Y: {
2693 if(picture_width % 2 == 0)
2694 pixels_in_horizontal_lastMCU = picture_width % 16;
2696 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2698 pixels_in_vertical_lastMCU = picture_height % 8;
2700 //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2701 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2702 frame_height_in_blks = ((picture_height + 7) / 8);
2707 BEGIN_BCS_BATCH(batch, 3);
2709 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2711 OUT_BCS_BATCH(batch,
2712 ( pixels_in_horizontal_lastMCU << 26) | /* Pixels In Horizontal Last MCU */
2713 ( pixels_in_vertical_lastMCU << 21) | /* Pixels In Vertical Last MCU */
2714 ( input_surface_format << 8) | /* Input Surface format */
2715 ( output_mcu_format << 0)); /* Output MCU Structure */
2717 OUT_BCS_BATCH(batch,
2718 ((frame_height_in_blks - 1) << 16) | /* Frame Height In Blks Minus 1 */
2719 (JPEG_ENC_ROUND_QUANT_DEFAULT << 13) | /* Rounding Quant set to default value 0 */
2720 ((frame_width_in_blks - 1) << 0)); /* Frame Width In Blks Minus 1 */
2721 ADVANCE_BCS_BATCH(batch);
2725 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2728 short reciprocal_qm[64];
2730 for(i=0; i<64; i++) {
2731 reciprocal_qm[i] = 65535/(raster_qm[i]);
2734 for(i=0; i<64; i++) {
2735 dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
2744 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2745 struct intel_encoder_context *encoder_context,
2746 struct encode_state *encode_state)
2748 unsigned int quality = 0;
2749 uint32_t temp, i = 0, j = 0, dword_qm[32];
2750 VAEncPictureParameterBufferJPEG *pic_param;
2751 VAQMatrixBufferJPEG *qmatrix;
2752 unsigned char raster_qm[64], column_raster_qm[64];
2753 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2755 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2756 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2757 quality = pic_param->quality;
2759 //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2760 //The app can send qmatrix for the first frame and not send for the subsequent frames
2761 if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
2762 qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2764 mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2765 memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2767 if(pic_param->num_components > 1) {
2768 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2769 memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2771 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2775 //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2776 qmatrix = &mfc_context->buffered_qmatrix;
2777 qmatrix->load_lum_quantiser_matrix = 1;
2778 qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2782 //As per the design, normalization of the quality factor and scaling of the Quantization tables
2783 //based on the quality factor needs to be done in the driver before sending the values to the HW.
2784 //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2785 //packed header information. The packed header is written as the header of the jpeg file. This
2786 //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2787 //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2788 //how to do this). QTables can be different for different applications. If no tables are provided,
2789 //the default tables in the driver are used.
2791 //Normalization of the quality factor
2792 if (quality > 100) quality=100;
2793 if (quality == 0) quality=1;
2794 quality = (quality < 50) ? (5000/quality) : (200 - (quality*2));
2796 //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2797 //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2798 //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2799 //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2802 if(qmatrix->load_lum_quantiser_matrix) {
2803 //apply quality to lum_quantiser_matrix
2804 for(i=0; i < 64; i++) {
2805 temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
2806 //clamp to range [1,255]
2807 temp = (temp > 255) ? 255 : temp;
2808 temp = (temp < 1) ? 1 : temp;
2809 qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2812 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2813 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2814 for (j = 0; j < 64; j++)
2815 raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2817 //Convert the raster order(row-ordered) to the column-raster (column by column).
2818 //To be consistent with the other encoders, send it in column order.
2819 //Need to double check if our HW expects col or row raster.
2820 for (j = 0; j < 64; j++) {
2821 int row = j / 8, col = j % 8;
2822 column_raster_qm[col * 8 + row] = raster_qm[j];
2825 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2826 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2828 //send the luma qm to the command buffer
2829 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2832 //For Chroma, if chroma exists (Cb, Cr or G, B)
2833 if(qmatrix->load_chroma_quantiser_matrix) {
2834 //apply quality to chroma_quantiser_matrix
2835 for(i=0; i < 64; i++) {
2836 temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
2837 //clamp to range [1,255]
2838 temp = (temp > 255) ? 255 : temp;
2839 temp = (temp < 1) ? 1 : temp;
2840 qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2843 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2844 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2845 for (j = 0; j < 64; j++)
2846 raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2848 //Convert the raster order(row-ordered) to the column-raster (column by column).
2849 //To be consistent with the other encoders, send it in column order.
2850 //Need to double check if our HW expects col or row raster.
2851 for (j = 0; j < 64; j++) {
2852 int row = j / 8, col = j % 8;
2853 column_raster_qm[col * 8 + row] = raster_qm[j];
2857 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2858 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2860 //send the same chroma qm to the command buffer (for both U,V or G,B)
2861 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2862 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2867 //Translation of Table K.5 into code: This method takes the huffval from the
2868 //Huffmantable buffer and converts into index for the coefficients and size tables
2869 uint8_t map_huffval_to_index(uint8_t huff_val)
2873 if(huff_val < 0xF0) {
2874 index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2876 index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2883 //Implementation of Flow chart Annex C - Figure C.1
2885 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2887 uint8_t i=1, j=1, k=0;
2890 while(j <= (uint8_t)bits[i-1]) {
2891 huff_size_table[k] = i;
2899 huff_size_table[k] = 0;
2903 //Implementation of Flow chart Annex C - Figure C.2
2905 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2909 uint8_t si=huff_size_table[k];
2911 while(huff_size_table[k] != 0) {
2913 while(huff_size_table[k] == si) {
2915 // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2916 if(code == 0xFFFF) {
2920 huff_code_table[k] = code;
2931 //Implementation of Flow chat Annex C - Figure C.3
2933 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2935 uint8_t huff_val_size=0, i=0, k=0;
2937 huff_val_size = (type == 0) ? 12 : 162;
2938 uint8_t huff_si_table[huff_val_size];
2939 uint16_t huff_co_table[huff_val_size];
2941 memset(huff_si_table, 0, sizeof(huff_si_table));
2942 memset(huff_co_table, 0, sizeof(huff_co_table));
2945 i = map_huffval_to_index(huff_vals[k]);
2946 huff_co_table[i] = huff_code_table[k];
2947 huff_si_table[i] = huff_size_table[k];
2951 memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2952 memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2956 //This method converts the huffman table to code words which is needed by the HW
2957 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2959 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2961 uint8_t lastK = 0, i=0;
2962 uint8_t huff_val_size = 0;
2963 uint8_t *huff_bits, *huff_vals;
2965 huff_val_size = (type == 0) ? 12 : 162;
2966 uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
2967 uint16_t huff_code_table[huff_val_size];
2969 memset(huff_size_table, 0, sizeof(huff_size_table));
2970 memset(huff_code_table, 0, sizeof(huff_code_table));
2972 huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2973 huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2976 //Generation of table of Huffman code sizes
2977 generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2979 //Generation of table of Huffman codes
2980 generate_huffman_codes_table(huff_size_table, huff_code_table);
2982 //Ordering procedure for encoding procedure code tables
2983 generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2985 //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2986 //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2987 for(i=0; i<huff_val_size; i++) {
2989 table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2994 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2996 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2997 struct encode_state *encode_state,
2998 struct intel_encoder_context *encoder_context,
3001 VAHuffmanTableBufferJPEGBaseline *huff_buffer;
3002 struct intel_batchbuffer *batch = encoder_context->base.batch;
3004 uint32_t dc_table[12], ac_table[162];
3006 assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3007 huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3009 memset(dc_table, 0, 12);
3010 memset(ac_table, 0, 162);
3012 for (index = 0; index < num_tables; index++) {
3013 int id = va_to_gen7_jpeg_hufftable[index];
3015 if (!huff_buffer->load_huffman_table[index])
3018 //load DC table with 12 DWords
3019 convert_hufftable_to_codes(huff_buffer, dc_table, 0, index); //0 for Dc
3021 //load AC table with 162 DWords
3022 convert_hufftable_to_codes(huff_buffer, ac_table, 1, index); //1 for AC
3024 BEGIN_BCS_BATCH(batch, 176);
3025 OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3026 OUT_BCS_BATCH(batch, id); //Huff table id
3028 //DWord 2 - 13 has DC_TABLE
3029 intel_batchbuffer_data(batch, dc_table, 12*4);
3031 //Dword 14 -175 has AC_TABLE
3032 intel_batchbuffer_data(batch, ac_table, 162*4);
3033 ADVANCE_BCS_BATCH(batch);
3038 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3039 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3041 switch (surface_format) {
3042 case VA_FOURCC_Y800: {
3047 case VA_FOURCC_NV12: {
3052 case VA_FOURCC_UYVY: {
3057 case VA_FOURCC_YUY2: {
3062 case VA_FOURCC_RGBA:
3063 case VA_FOURCC_444P: {
3068 default : { //May be have to insert error handling here. For now just use as below
3076 //set MFC_JPEG_SCAN_OBJECT
3078 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3079 struct encode_state *encode_state,
3080 struct intel_encoder_context *encoder_context)
3082 uint32_t mcu_count, surface_format, Mx, My;
3083 uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
3084 uint8_t is_last_scan = 1; //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3085 uint8_t head_present_flag=1; //Header has tables and app data
3086 uint16_t num_components, restart_interval; //Specifies number of MCUs in an ECS.
3087 VAEncSliceParameterBufferJPEG *slice_param;
3088 VAEncPictureParameterBufferJPEG *pic_param;
3090 struct intel_batchbuffer *batch = encoder_context->base.batch;
3091 struct object_surface *obj_surface = encode_state->input_yuv_object;
3093 assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3094 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3095 assert(obj_surface);
3096 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3097 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3098 surface_format = obj_surface->fourcc;
3100 get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3102 // Mx = #MCUs in a row, My = #MCUs in a column
3103 Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
3104 My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
3105 mcu_count = (Mx * My);
3107 num_components = pic_param->num_components;
3108 restart_interval = slice_param->restart_interval;
3110 //Depending on number of components and values set for table selectors,
3111 //only those bits are set in 24:22 for AC table, 20:18 for DC table
3112 for(i=0; i<num_components; i++) {
3113 huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
3114 huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
3118 BEGIN_BCS_BATCH(batch, 3);
3120 OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3122 OUT_BCS_BATCH(batch, mcu_count << 0); //MCU Count
3124 OUT_BCS_BATCH(batch,
3125 (huff_ac_table << 22) | //Huffman AC Table
3126 (huff_dc_table << 18) | //Huffman DC Table
3127 (head_present_flag << 17) | //Head present flag
3128 (is_last_scan << 16) | //Is last scan
3129 (restart_interval << 0)); //Restart Interval
3130 ADVANCE_BCS_BATCH(batch);
3134 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3135 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3136 int is_end_of_slice)
3138 struct intel_batchbuffer *batch = encoder_context->base.batch;
3141 if (data_bits_in_last_dw == 0)
3142 data_bits_in_last_dw = 32;
3144 BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3146 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3148 OUT_BCS_BATCH(batch,
3149 (0 << 16) | //DataByteOffset 0 for JPEG Encoder
3150 (0 << 15) | //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3151 (data_bits_in_last_dw << 8) | //DataBitsInLastDW
3152 (0 << 4) | //SkipEmulByteCount 0 for JPEG Encoder
3153 (0 << 3) | //EmulationFlag 0 for JPEG Encoder
3154 ((!!is_last_header) << 2) | //LastHeaderFlag
3155 ((!!is_end_of_slice) << 1) | //EndOfSliceFlag
3156 (1 << 0)); //BitstreamStartReset 1 for JPEG Encoder
3158 intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
3160 ADVANCE_BCS_BATCH(batch);
3164 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3166 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3167 struct encode_state *encode_state,
3168 struct intel_encoder_context *encoder_context)
3170 if (encode_state->packed_header_data_ext) {
3171 VAEncPackedHeaderParameterBuffer *param = NULL;
3172 unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3173 unsigned int length_in_bits;
3175 param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3176 length_in_bits = param->bit_length;
3178 gen8_mfc_jpeg_pak_insert_object(encoder_context,
3180 ALIGN(length_in_bits, 32) >> 5,
3181 length_in_bits & 0x1f,
3187 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3188 //If the app sends the qmatrix, this will be replaced with the one app sends.
3190 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3193 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3195 //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3197 mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3200 mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3203 /* This is at the picture level */
3205 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3206 struct encode_state *encode_state,
3207 struct intel_encoder_context *encoder_context)
3209 int i, j, component, max_selector = 0;
3210 VAEncSliceParameterBufferJPEG *slice_param;
3212 gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3213 gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3214 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3215 gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3216 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3217 gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3219 //do the slice level encoding here
3220 gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3222 //I dont think I need this for loop. Just to be consistent with other encoding logic...
3223 for(i = 0; i < encode_state->num_slice_params_ext; i++) {
3224 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3225 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3227 for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3229 for(component = 0; component < slice_param->num_components; component++) {
3230 if(max_selector < slice_param->components[component].dc_table_selector)
3231 max_selector = slice_param->components[component].dc_table_selector;
3233 if (max_selector < slice_param->components[component].ac_table_selector)
3234 max_selector = slice_param->components[component].ac_table_selector;
3241 assert(max_selector < 2);
3242 //send the huffman table using MFC_JPEG_HUFF_TABLE
3243 gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
3244 //set MFC_JPEG_SCAN_OBJECT
3245 gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3246 //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3247 gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3252 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3253 struct encode_state *encode_state,
3254 struct intel_encoder_context *encoder_context)
3256 struct intel_batchbuffer *batch = encoder_context->base.batch;
3259 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3260 intel_batchbuffer_emit_mi_flush(batch);
3262 // picture level programing
3263 gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3266 intel_batchbuffer_end_atomic(batch);
3272 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3273 struct encode_state *encode_state,
3274 struct intel_encoder_context *encoder_context)
3276 gen8_mfc_init(ctx, encode_state, encoder_context);
3277 intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3278 /*Programing bcs pipeline*/
3279 gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3280 gen8_mfc_run(ctx, encode_state, encoder_context);
3282 return VA_STATUS_SUCCESS;
3285 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3286 struct gen6_mfc_context *mfc_context,
3287 int target_frame_size,
3290 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3291 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3292 unsigned int max_qindex = pic_param->clamp_qindex_high;
3293 unsigned int min_qindex = pic_param->clamp_qindex_low;
3294 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3295 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3297 int last_size_gap = -1;
3298 int per_mb_size_at_qindex;
3299 int target_qindex = min_qindex, i;
3301 /* make sure would not overflow*/
3302 if (target_frame_size >= (0x7fffffff >> 9))
3303 target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3305 target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3307 for (i = min_qindex; i <= max_qindex; i++) {
3308 per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3310 if (per_mb_size_at_qindex <= target_mb_size) {
3311 if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3316 last_size_gap = per_mb_size_at_qindex - target_mb_size;
3319 return target_qindex;
3322 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3323 struct intel_encoder_context* encoder_context)
3325 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3326 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3327 VAEncMiscParameterBuffer* misc_param_hrd = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer;
3328 VAEncMiscParameterHRD* param_hrd = (VAEncMiscParameterHRD*)misc_param_hrd->data;
3329 VAEncMiscParameterBuffer* misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate][0]->buffer;
3330 VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
3331 double bitrate = seq_param->bits_per_second;
3332 unsigned int frame_rate = param_frame_rate->framerate;
3333 int inum = 1, pnum = 0;
3334 int intra_period = seq_param->intra_period;
3335 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3336 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3337 int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
3339 pnum = intra_period - 1;
3341 mfc_context->brc.mode = encoder_context->rate_control_mode;
3343 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/frame_rate) /
3344 (double)(inum + BRC_PWEIGHT * pnum ));
3345 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3347 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
3348 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
3350 mfc_context->brc.bits_per_frame[0] = bitrate/frame_rate;
3352 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3354 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3356 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3358 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3361 mfc_context->hrd.buffer_size = (double)param_hrd->buffer_size;
3362 mfc_context->hrd.current_buffer_fullness =
3363 (double)(param_hrd->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
3364 param_hrd->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
3365 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
3366 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/max_frame_size;
3367 mfc_context->hrd.violation_noted = 0;
3370 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3371 struct intel_encoder_context *encoder_context,
3374 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3375 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3376 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3377 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3378 int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3379 int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3380 int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3381 int qp; // quantizer of previously encoded slice of current type
3382 int qpn; // predicted quantizer for next frame of current type in integer format
3383 double qpf; // predicted quantizer for next frame of current type in float format
3384 double delta_qp; // QP correction
3385 int target_frame_size, frame_size_next;
3387 * x - how far we are from HRD buffer borders
3388 * y - how far we are from target HRD buffer fullness
3391 double frame_size_alpha;
3392 unsigned int max_qindex = pic_param->clamp_qindex_high;
3393 unsigned int min_qindex = pic_param->clamp_qindex_low;
3395 qp = mfc_context->brc.qp_prime_y[0][slicetype];
3397 target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3398 if (mfc_context->hrd.buffer_capacity < 5)
3399 frame_size_alpha = 0;
3401 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
3402 if (frame_size_alpha > 30) frame_size_alpha = 30;
3403 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3404 (double)(frame_size_alpha + 1.);
3406 /* frame_size_next: avoiding negative number and too small value */
3407 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3408 frame_size_next = (int)((double)target_frame_size * 0.25);
3410 qpf = (double)qp * target_frame_size / frame_size_next;
3411 qpn = (int)(qpf + 0.5);
3414 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3415 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
3416 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
3418 mfc_context->brc.qpf_rounding_accumulator = 0.;
3419 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
3421 mfc_context->brc.qpf_rounding_accumulator = 0.;
3425 /* making sure that QP is not changing too fast */
3426 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3427 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3428 /* making sure that with QP predictions we did do not leave QPs range */
3429 BRC_CLIP(qpn, min_qindex, max_qindex);
3431 /* checking wthether HRD compliance is still met */
3432 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3434 /* calculating QP delta as some function*/
3435 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
3437 x /= mfc_context->hrd.target_buffer_fullness;
3438 y = mfc_context->hrd.current_buffer_fullness;
3441 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
3442 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
3444 if (y < 0.01) y = 0.01;
3446 else if (x < -1) x = -1;
3448 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
3449 qpn = (int)(qpn + delta_qp + 0.5);
3451 /* making sure that with QP predictions we did do not leave QPs range */
3452 BRC_CLIP(qpn, min_qindex, max_qindex);
3454 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3455 /* correcting QPs of slices of other types */
3456 if (!is_key_frame) {
3457 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3458 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3460 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3461 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3463 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3464 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3465 } else if (sts == BRC_UNDERFLOW) { // underflow
3466 if (qpn <= qp) qpn = qp + 2;
3467 if (qpn > max_qindex) {
3469 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3471 } else if (sts == BRC_OVERFLOW) {
3472 if (qpn >= qp) qpn = qp - 2;
3473 if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3475 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3479 mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3484 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3485 struct intel_encoder_context *encoder_context)
3487 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3488 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3489 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3490 int target_bit_rate = seq_param->bits_per_second;
3492 // current we only support CBR mode.
3493 if (rate_control_mode == VA_RC_CBR) {
3494 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3495 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3496 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3497 mfc_context->vui_hrd.i_frame_number = 0;
3499 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3500 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3501 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3506 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3507 struct gen6_mfc_context *mfc_context)
3509 mfc_context->vui_hrd.i_frame_number++;
3513 * Check whether the parameters related with CBR are updated and decide whether
3514 * it needs to reinitialize the configuration related with CBR.
3515 * Currently it will check the following parameters:
3518 * gop_configuration(intra_period, ip_period, intra_idr_period)
3520 static bool gen8_mfc_vp8_brc_updated_check(struct encode_state *encode_state,
3521 struct intel_encoder_context *encoder_context)
3523 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3524 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3525 double cur_fps, cur_bitrate;
3526 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3527 VAEncMiscParameterBuffer *misc_param_frame_rate_buf = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate][0]->buffer;
3528 VAEncMiscParameterFrameRate *param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buf->data;
3529 unsigned int frame_rate = param_frame_rate->framerate;
3531 if (rate_control_mode != VA_RC_CBR) {
3535 cur_bitrate = seq_param->bits_per_second;
3536 cur_fps = frame_rate;
3538 if ((cur_bitrate == mfc_context->brc.saved_bps) &&
3539 (cur_fps == mfc_context->brc.saved_fps) &&
3540 (seq_param->intra_period == mfc_context->brc.saved_intra_period)) {
3541 /* the parameters related with CBR are not updaetd */
3545 mfc_context->brc.saved_intra_period = seq_param->intra_period;
3546 mfc_context->brc.saved_fps = cur_fps;
3547 mfc_context->brc.saved_bps = cur_bitrate;
3551 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3552 struct intel_encoder_context *encoder_context)
3554 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3556 if (rate_control_mode == VA_RC_CBR) {
3558 assert(encoder_context->codec != CODEC_MPEG2);
3560 brc_updated = gen8_mfc_vp8_brc_updated_check(encode_state, encoder_context);
3562 /*Programing bit rate control */
3564 gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3567 /*Programing HRD control */
3569 gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3573 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3574 VAEncPictureParameterBufferVP8 *pic_param,
3575 VAQMatrixBufferVP8 *q_matrix)
3578 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3579 unsigned char *coeff_probs_stream_in_buffer;
3581 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3582 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3583 mfc_context->vp8_state.frame_header_token_update_pos = 0;
3584 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3586 mfc_context->vp8_state.prob_skip_false = 255;
3587 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3588 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3591 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3592 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3594 mfc_context->vp8_state.prob_intra = 255;
3595 mfc_context->vp8_state.prob_last = 128;
3596 mfc_context->vp8_state.prob_gf = 128;
3598 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3599 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3601 mfc_context->vp8_state.prob_intra = 63;
3602 mfc_context->vp8_state.prob_last = 128;
3603 mfc_context->vp8_state.prob_gf = 128;
3606 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3608 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3609 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3610 assert(coeff_probs_stream_in_buffer);
3611 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3612 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3615 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3616 VAQMatrixBufferVP8 *q_matrix)
3619 /*some other probabilities need to be updated*/
3622 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3623 VAEncPictureParameterBufferVP8 *pic_param,
3624 VAQMatrixBufferVP8 *q_matrix,
3625 struct gen6_mfc_context *mfc_context,
3626 struct intel_encoder_context *encoder_context);
3628 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3629 struct intel_encoder_context *encoder_context,
3630 struct gen6_mfc_context *mfc_context)
3632 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3633 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3634 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3635 unsigned char *frame_header_buffer;
3637 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3639 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3640 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3641 assert(frame_header_buffer);
3642 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3643 free(mfc_context->vp8_state.vp8_frame_header);
3644 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3647 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3648 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3650 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3651 struct encode_state *encode_state,
3652 struct intel_encoder_context *encoder_context)
3654 struct i965_driver_data *i965 = i965_driver_data(ctx);
3655 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3658 int width_in_mbs = 0;
3659 int height_in_mbs = 0;
3660 int slice_batchbuffer_size;
3661 int is_key_frame, slice_type, rate_control_mode;
3663 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3664 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3665 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3667 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3668 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3670 is_key_frame = !pic_param->pic_flags.bits.frame_type;
3671 slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3672 rate_control_mode = encoder_context->rate_control_mode;
3674 if (rate_control_mode == VA_RC_CBR) {
3675 q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3676 for (i = 1; i < 4; i++)
3677 q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3678 for (i = 0; i < 5; i++)
3679 q_matrix->quantization_index_delta[i] = 0;
3682 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3683 (SLICE_HEADER + SLICE_TAIL);
3685 /*Encode common setup for MFC*/
3686 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3687 mfc_context->post_deblocking_output.bo = NULL;
3689 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3690 mfc_context->pre_deblocking_output.bo = NULL;
3692 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3693 mfc_context->uncompressed_picture_source.bo = NULL;
3695 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3696 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3698 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3699 if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3700 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3701 mfc_context->direct_mv_buffers[i].bo = NULL;
3704 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3705 if (mfc_context->reference_surfaces[i].bo != NULL)
3706 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3707 mfc_context->reference_surfaces[i].bo = NULL;
3710 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3711 bo = dri_bo_alloc(i965->intel.bufmgr,
3713 width_in_mbs * 64 * 16,
3716 mfc_context->intra_row_store_scratch_buffer.bo = bo;
3718 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3719 bo = dri_bo_alloc(i965->intel.bufmgr,
3721 width_in_mbs * height_in_mbs * 16,
3724 mfc_context->macroblock_status_buffer.bo = bo;
3726 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3727 bo = dri_bo_alloc(i965->intel.bufmgr,
3729 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3732 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3734 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3735 bo = dri_bo_alloc(i965->intel.bufmgr,
3737 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3740 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3742 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3743 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3745 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3746 mfc_context->aux_batchbuffer_surface.bo = NULL;
3748 if (mfc_context->aux_batchbuffer) {
3749 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3750 mfc_context->aux_batchbuffer = NULL;
3753 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3754 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3755 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3756 mfc_context->aux_batchbuffer_surface.pitch = 16;
3757 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3758 mfc_context->aux_batchbuffer_surface.size_block = 16;
3760 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3762 /* alloc vp8 encoding buffers*/
3763 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3764 bo = dri_bo_alloc(i965->intel.bufmgr,
3766 MAX_VP8_FRAME_HEADER_SIZE,
3769 mfc_context->vp8_state.frame_header_bo = bo;
3771 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3772 for(i = 0; i < 8; i++) {
3773 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3775 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3776 bo = dri_bo_alloc(i965->intel.bufmgr,
3778 mfc_context->vp8_state.intermediate_buffer_max_size,
3781 mfc_context->vp8_state.intermediate_bo = bo;
3783 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3784 bo = dri_bo_alloc(i965->intel.bufmgr,
3786 width_in_mbs * height_in_mbs * 16,
3789 mfc_context->vp8_state.stream_out_bo = bo;
3791 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3792 bo = dri_bo_alloc(i965->intel.bufmgr,
3794 sizeof(vp8_default_coef_probs),
3797 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3799 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3800 bo = dri_bo_alloc(i965->intel.bufmgr,
3802 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3805 mfc_context->vp8_state.token_statistics_bo = bo;
3807 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3808 bo = dri_bo_alloc(i965->intel.bufmgr,
3810 width_in_mbs * 16 * 64,
3813 mfc_context->vp8_state.mpc_row_store_bo = bo;
3815 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3816 vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3820 intel_mfc_vp8_prepare(VADriverContextP ctx,
3821 struct encode_state *encode_state,
3822 struct intel_encoder_context *encoder_context)
3824 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3825 struct object_surface *obj_surface;
3826 struct object_buffer *obj_buffer;
3827 struct i965_coded_buffer_segment *coded_buffer_segment;
3828 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3829 VAStatus vaStatus = VA_STATUS_SUCCESS;
3833 /* reconstructed surface */
3834 obj_surface = encode_state->reconstructed_object;
3835 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3836 if (pic_param->loop_filter_level[0] == 0) {
3837 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3838 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3840 mfc_context->post_deblocking_output.bo = obj_surface->bo;
3841 dri_bo_reference(mfc_context->post_deblocking_output.bo);
3844 mfc_context->surface_state.width = obj_surface->orig_width;
3845 mfc_context->surface_state.height = obj_surface->orig_height;
3846 mfc_context->surface_state.w_pitch = obj_surface->width;
3847 mfc_context->surface_state.h_pitch = obj_surface->height;
3849 /* set vp8 reference frames */
3850 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3851 obj_surface = encode_state->reference_objects[i];
3853 if (obj_surface && obj_surface->bo) {
3854 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3855 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3857 mfc_context->reference_surfaces[i].bo = NULL;
3861 /* input YUV surface */
3862 obj_surface = encode_state->input_yuv_object;
3863 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3864 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3867 obj_buffer = encode_state->coded_buf_object;
3868 bo = obj_buffer->buffer_store->bo;
3869 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3870 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3871 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3872 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3874 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3875 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3876 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3877 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3879 /* set the internal flag to 0 to indicate the coded size is unknown */
3881 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3882 coded_buffer_segment->mapped = 0;
3883 coded_buffer_segment->codec = encoder_context->codec;
3890 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3891 struct encode_state *encode_state,
3892 struct intel_encoder_context *encoder_context)
3894 struct intel_batchbuffer *batch = encoder_context->base.batch;
3895 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3896 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3897 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3899 BEGIN_BCS_BATCH(batch, 30);
3900 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3902 OUT_BCS_BATCH(batch,
3903 0 << 9 | /* compressed bitstream output disable */
3904 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3905 1 << 6 | /* RC initial pass */
3906 0 << 4 | /* upate segment feature date flag */
3907 1 << 3 | /* bitstream statistics output enable */
3908 1 << 2 | /* token statistics output enable */
3909 0 << 1 | /* final bitstream output disable */
3912 OUT_BCS_BATCH(batch, 0); /*DW2*/
3914 OUT_BCS_BATCH(batch,
3915 0xfff << 16 | /* max intra mb bit count limit */
3916 0xfff << 0 /* max inter mb bit count limit */
3919 OUT_BCS_BATCH(batch, 0); /*DW4*/
3920 OUT_BCS_BATCH(batch, 0); /*DW5*/
3921 OUT_BCS_BATCH(batch, 0); /*DW6*/
3922 OUT_BCS_BATCH(batch, 0); /*DW7*/
3923 OUT_BCS_BATCH(batch, 0); /*DW8*/
3924 OUT_BCS_BATCH(batch, 0); /*DW9*/
3925 OUT_BCS_BATCH(batch, 0); /*DW10*/
3926 OUT_BCS_BATCH(batch, 0); /*DW11*/
3927 OUT_BCS_BATCH(batch, 0); /*DW12*/
3928 OUT_BCS_BATCH(batch, 0); /*DW13*/
3929 OUT_BCS_BATCH(batch, 0); /*DW14*/
3930 OUT_BCS_BATCH(batch, 0); /*DW15*/
3931 OUT_BCS_BATCH(batch, 0); /*DW16*/
3932 OUT_BCS_BATCH(batch, 0); /*DW17*/
3933 OUT_BCS_BATCH(batch, 0); /*DW18*/
3934 OUT_BCS_BATCH(batch, 0); /*DW19*/
3935 OUT_BCS_BATCH(batch, 0); /*DW20*/
3936 OUT_BCS_BATCH(batch, 0); /*DW21*/
3938 OUT_BCS_BATCH(batch,
3939 pic_param->pic_flags.bits.show_frame << 23 |
3940 pic_param->pic_flags.bits.version << 20
3943 OUT_BCS_BATCH(batch,
3944 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3945 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3949 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3952 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3955 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3958 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3961 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3964 OUT_BCS_BATCH(batch, 0);
3966 ADVANCE_BCS_BATCH(batch);
3970 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3971 struct encode_state *encode_state,
3972 struct intel_encoder_context *encoder_context)
3974 struct intel_batchbuffer *batch = encoder_context->base.batch;
3975 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3976 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3977 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3978 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3981 log2num = pic_param->pic_flags.bits.num_token_partitions;
3983 /*update mode and token probs*/
3984 vp8_enc_state_update(mfc_context, q_matrix);
3986 BEGIN_BCS_BATCH(batch, 38);
3987 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3988 OUT_BCS_BATCH(batch,
3989 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3990 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3992 OUT_BCS_BATCH(batch,
3994 pic_param->sharpness_level << 16 |
3995 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3996 pic_param->pic_flags.bits.sign_bias_golden << 12 |
3997 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3998 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3999 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
4000 pic_param->pic_flags.bits.segmentation_enabled << 8 |
4001 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
4002 (pic_param->pic_flags.bits.version / 2) << 4 |
4003 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
4004 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
4006 OUT_BCS_BATCH(batch,
4007 pic_param->loop_filter_level[3] << 24 |
4008 pic_param->loop_filter_level[2] << 16 |
4009 pic_param->loop_filter_level[1] << 8 |
4010 pic_param->loop_filter_level[0] << 0);
4012 OUT_BCS_BATCH(batch,
4013 q_matrix->quantization_index[3] << 24 |
4014 q_matrix->quantization_index[2] << 16 |
4015 q_matrix->quantization_index[1] << 8 |
4016 q_matrix->quantization_index[0] << 0);
4018 OUT_BCS_BATCH(batch,
4019 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
4020 abs(q_matrix->quantization_index_delta[4]) << 24 |
4021 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
4022 abs(q_matrix->quantization_index_delta[3]) << 16 |
4023 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
4024 abs(q_matrix->quantization_index_delta[2]) << 8 |
4025 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
4026 abs(q_matrix->quantization_index_delta[1]) << 0);
4028 OUT_BCS_BATCH(batch,
4029 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
4030 abs(q_matrix->quantization_index_delta[0]) << 0);
4032 OUT_BCS_BATCH(batch,
4033 pic_param->clamp_qindex_high << 8 |
4034 pic_param->clamp_qindex_low << 0);
4036 for (i = 8; i < 19; i++) {
4037 OUT_BCS_BATCH(batch, 0xffffffff);
4040 OUT_BCS_BATCH(batch,
4041 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
4042 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
4043 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
4045 OUT_BCS_BATCH(batch,
4046 mfc_context->vp8_state.prob_skip_false << 24 |
4047 mfc_context->vp8_state.prob_intra << 16 |
4048 mfc_context->vp8_state.prob_last << 8 |
4049 mfc_context->vp8_state.prob_gf << 0);
4051 OUT_BCS_BATCH(batch,
4052 mfc_context->vp8_state.y_mode_probs[3] << 24 |
4053 mfc_context->vp8_state.y_mode_probs[2] << 16 |
4054 mfc_context->vp8_state.y_mode_probs[1] << 8 |
4055 mfc_context->vp8_state.y_mode_probs[0] << 0);
4057 OUT_BCS_BATCH(batch,
4058 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4059 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
4060 mfc_context->vp8_state.uv_mode_probs[0] << 0);
4062 /* MV update value, DW23-DW32 */
4063 for (i = 0; i < 2; i++) {
4064 for (j = 0; j < 20; j += 4) {
4065 OUT_BCS_BATCH(batch,
4066 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4067 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4068 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
4069 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
4073 OUT_BCS_BATCH(batch,
4074 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4075 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4076 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
4077 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
4079 OUT_BCS_BATCH(batch,
4080 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4081 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4082 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
4083 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
4085 OUT_BCS_BATCH(batch, 0);
4086 OUT_BCS_BATCH(batch, 0);
4087 OUT_BCS_BATCH(batch, 0);
4089 ADVANCE_BCS_BATCH(batch);
4092 #define OUT_VP8_BUFFER(bo, offset) \
4094 OUT_BCS_RELOC(batch, \
4096 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4099 OUT_BCS_BATCH(batch, 0); \
4100 OUT_BCS_BATCH(batch, 0); \
4101 OUT_BCS_BATCH(batch, 0);
4104 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4105 struct encode_state *encode_state,
4106 struct intel_encoder_context *encoder_context)
4108 struct intel_batchbuffer *batch = encoder_context->base.batch;
4109 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4111 BEGIN_BCS_BATCH(batch, 32);
4112 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4114 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4116 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4117 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4118 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4119 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4120 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4121 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4122 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4123 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4124 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4125 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4127 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4128 OUT_BCS_BATCH(batch, 0);
4130 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4131 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4132 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4133 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4135 ADVANCE_BCS_BATCH(batch);
4139 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4140 struct encode_state *encode_state,
4141 struct intel_encoder_context *encoder_context)
4143 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4145 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4146 mfc_context->set_surface_state(ctx, encoder_context);
4147 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4148 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4149 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4150 gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4151 gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
4152 gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4155 static const unsigned char
4156 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4163 static const unsigned char
4164 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4176 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4178 unsigned int i, pak_pred_mode = 0;
4179 unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4182 pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4184 for (i = 0; i < 8; i++) {
4185 vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4186 assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4187 pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4188 pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4192 return pak_pred_mode;
4195 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4196 struct intel_encoder_context *encoder_context,
4199 struct intel_batchbuffer *batch)
4201 unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4202 unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4203 unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4206 batch = encoder_context->base.batch;
4208 vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4209 assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4210 pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4212 vme_luma_pred_mode[0] = msg[1];
4213 vme_luma_pred_mode[1] = msg[2];
4214 vme_chroma_pred_mode = msg[3] & 0x3;
4216 pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4217 pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4218 pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4220 BEGIN_BCS_BATCH(batch, 7);
4222 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4223 OUT_BCS_BATCH(batch, 0);
4224 OUT_BCS_BATCH(batch, 0);
4225 OUT_BCS_BATCH(batch,
4226 (0 << 20) | /* mv format: intra mb */
4227 (0 << 18) | /* Segment ID */
4228 (0 << 17) | /* disable coeff clamp */
4229 (1 << 13) | /* intra mb flag */
4230 (0 << 11) | /* refer picture select: last frame */
4231 (pak_intra_mb_mode << 8) | /* mb type */
4232 (pak_chroma_pred_mode << 4) | /* mb uv mode */
4233 (0 << 2) | /* skip mb flag: disable */
4236 OUT_BCS_BATCH(batch, (y << 16) | x);
4237 OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4238 OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4240 ADVANCE_BCS_BATCH(batch);
4244 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4245 struct intel_encoder_context *encoder_context,
4249 struct intel_batchbuffer *batch)
4254 batch = encoder_context->base.batch;
4256 /* only support inter_16x16 now */
4257 assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4258 /* for inter_16x16, all 16 MVs should be same,
4259 * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4260 * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4262 msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
4264 for (i = 1; i < 16; i++) {
4268 BEGIN_BCS_BATCH(batch, 7);
4270 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4271 OUT_BCS_BATCH(batch,
4272 (0 << 29) | /* enable inline mv data: disable */
4274 OUT_BCS_BATCH(batch,
4276 OUT_BCS_BATCH(batch,
4277 (4 << 20) | /* mv format: inter */
4278 (0 << 18) | /* Segment ID */
4279 (0 << 17) | /* coeff clamp: disable */
4280 (0 << 13) | /* intra mb flag: inter mb */
4281 (0 << 11) | /* refer picture select: last frame */
4282 (0 << 8) | /* mb type: 16x16 */
4283 (0 << 4) | /* mb uv mode: dc_pred */
4284 (0 << 2) | /* skip mb flag: disable */
4287 OUT_BCS_BATCH(batch, (y << 16) | x);
4290 OUT_BCS_BATCH(batch, 0x8);
4291 OUT_BCS_BATCH(batch, 0x8);
4293 ADVANCE_BCS_BATCH(batch);
4297 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4298 struct encode_state *encode_state,
4299 struct intel_encoder_context *encoder_context,
4300 struct intel_batchbuffer *slice_batch)
4302 struct gen6_vme_context *vme_context = encoder_context->vme_context;
4303 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4304 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4305 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4306 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4307 unsigned int *msg = NULL;
4308 unsigned char *msg_ptr = NULL;
4309 unsigned int i, offset, is_intra_frame;
4311 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4313 dri_bo_map(vme_context->vme_output.bo , 1);
4314 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4316 for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
4317 int h_pos = i % width_in_mbs;
4318 int v_pos = i / width_in_mbs;
4319 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
4321 if (is_intra_frame) {
4322 gen8_mfc_vp8_pak_object_intra(ctx,
4328 int inter_rdo, intra_rdo;
4329 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4330 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4332 if (intra_rdo < inter_rdo) {
4333 gen8_mfc_vp8_pak_object_intra(ctx,
4339 offset = i * vme_context->vme_output.size_block;
4340 gen8_mfc_vp8_pak_object_inter(ctx,
4350 dri_bo_unmap(vme_context->vme_output.bo);
4354 * A batch buffer for vp8 pak object commands
4357 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4358 struct encode_state *encode_state,
4359 struct intel_encoder_context *encoder_context)
4361 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4362 struct intel_batchbuffer *batch;
4365 batch = mfc_context->aux_batchbuffer;
4366 batch_bo = batch->buffer;
4368 gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4370 intel_batchbuffer_align(batch, 8);
4372 BEGIN_BCS_BATCH(batch, 2);
4373 OUT_BCS_BATCH(batch, 0);
4374 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4375 ADVANCE_BCS_BATCH(batch);
4377 dri_bo_reference(batch_bo);
4378 intel_batchbuffer_free(batch);
4379 mfc_context->aux_batchbuffer = NULL;
4385 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4386 struct encode_state *encode_state,
4387 struct intel_encoder_context *encoder_context)
4389 struct intel_batchbuffer *batch = encoder_context->base.batch;
4390 dri_bo *slice_batch_bo;
4392 slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4395 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4396 intel_batchbuffer_emit_mi_flush(batch);
4398 // picture level programing
4399 gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4401 BEGIN_BCS_BATCH(batch, 4);
4402 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4403 OUT_BCS_RELOC(batch,
4405 I915_GEM_DOMAIN_COMMAND, 0,
4407 OUT_BCS_BATCH(batch, 0);
4408 OUT_BCS_BATCH(batch, 0);
4409 ADVANCE_BCS_BATCH(batch);
4412 intel_batchbuffer_end_atomic(batch);
4414 dri_bo_unreference(slice_batch_bo);
4417 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4418 struct encode_state *encode_state,
4419 struct intel_encoder_context *encoder_context)
4421 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4422 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4423 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4424 unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4426 int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4428 first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4430 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4432 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4433 first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4435 for (i = 1; i <= partition_num; i++)
4436 token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4438 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4439 /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4440 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4442 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4444 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4445 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4446 coded_buffer_segment->base.size = vp8_coded_bytes;
4447 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4449 return vp8_coded_bytes;
4453 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4454 struct encode_state *encode_state,
4455 struct intel_encoder_context *encoder_context)
4457 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4458 unsigned int rate_control_mode = encoder_context->rate_control_mode;
4459 int current_frame_bits_size;
4462 gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4463 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4464 /*Programing bcs pipeline*/
4465 gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4466 gen8_mfc_run(ctx, encode_state, encoder_context);
4467 current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4469 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4470 sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4471 if (sts == BRC_NO_HRD_VIOLATION) {
4472 gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4474 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4475 if (!mfc_context->hrd.violation_noted) {
4476 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
4477 mfc_context->hrd.violation_noted = 1;
4479 return VA_STATUS_SUCCESS;
4483 return VA_STATUS_SUCCESS;
4487 gen8_mfc_context_destroy(void *context)
4489 struct gen6_mfc_context *mfc_context = context;
4492 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4493 mfc_context->post_deblocking_output.bo = NULL;
4495 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4496 mfc_context->pre_deblocking_output.bo = NULL;
4498 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4499 mfc_context->uncompressed_picture_source.bo = NULL;
4501 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4502 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4504 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
4505 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4506 mfc_context->direct_mv_buffers[i].bo = NULL;
4509 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4510 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4512 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4513 mfc_context->macroblock_status_buffer.bo = NULL;
4515 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4516 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4518 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4519 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4522 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
4523 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4524 mfc_context->reference_surfaces[i].bo = NULL;
4527 gen8_gpe_context_destroy(&mfc_context->gpe_context);
4529 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4530 mfc_context->mfc_batchbuffer_surface.bo = NULL;
4532 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4533 mfc_context->aux_batchbuffer_surface.bo = NULL;
4535 if (mfc_context->aux_batchbuffer)
4536 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4538 mfc_context->aux_batchbuffer = NULL;
4540 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4541 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4543 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4544 mfc_context->vp8_state.final_frame_bo = NULL;
4546 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4547 mfc_context->vp8_state.frame_header_bo = NULL;
4549 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4550 mfc_context->vp8_state.intermediate_bo = NULL;
4552 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4553 mfc_context->vp8_state.mpc_row_store_bo = NULL;
4555 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4556 mfc_context->vp8_state.stream_out_bo = NULL;
4558 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4559 mfc_context->vp8_state.token_statistics_bo = NULL;
4564 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4566 struct encode_state *encode_state,
4567 struct intel_encoder_context *encoder_context)
4572 case VAProfileH264ConstrainedBaseline:
4573 case VAProfileH264Main:
4574 case VAProfileH264High:
4575 case VAProfileH264MultiviewHigh:
4576 case VAProfileH264StereoHigh:
4577 vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4580 /* FIXME: add for other profile */
4581 case VAProfileMPEG2Simple:
4582 case VAProfileMPEG2Main:
4583 vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4586 case VAProfileJPEGBaseline:
4587 jpeg_init_default_qmatrix(ctx, encoder_context);
4588 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4591 case VAProfileVP8Version0_3:
4592 vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4596 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4603 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4605 struct i965_driver_data *i965 = i965_driver_data(ctx);
4606 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4608 assert(mfc_context);
4609 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4611 mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
4612 mfc_context->gpe_context.curbe_size = 32 * 4;
4613 mfc_context->gpe_context.sampler_size = 0;
4615 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4616 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4617 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4618 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4619 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4621 if (IS_GEN9(i965->intel.device_info)) {
4622 gen8_gpe_load_kernels(ctx,
4623 &mfc_context->gpe_context,
4627 gen8_gpe_load_kernels(ctx,
4628 &mfc_context->gpe_context,
4633 mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4634 mfc_context->set_surface_state = gen8_mfc_surface_state;
4635 mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4636 mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4637 mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4638 mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4639 mfc_context->insert_object = gen8_mfc_avc_insert_object;
4640 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4642 encoder_context->mfc_context = mfc_context;
4643 encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4644 encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4646 if (encoder_context->codec == CODEC_VP8)
4647 encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4649 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;