2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
48 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
52 #define MFC_SOFTWARE_BATCH 0
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75 16, 11, 10, 16, 24, 40, 51, 61,
76 12, 12, 14, 19, 26, 58, 60, 55,
77 14, 13, 16, 24, 40, 57, 69, 56,
78 14, 17, 22, 29, 51, 87, 80, 62,
79 18, 22, 37, 56, 68, 109, 103, 77,
80 24, 35, 55, 64, 81, 104, 113, 92,
81 49, 64, 78, 87, 103, 121, 120, 101,
82 72, 92, 95, 98, 112, 100, 103, 99
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88 17, 18, 24, 47, 99, 99, 99, 99,
89 18, 21, 26, 66, 99, 99, 99, 99,
90 24, 26, 56, 99, 99, 99, 99, 99,
91 47, 66, 99, 99, 99, 99, 99, 99,
92 99, 99, 99, 99, 99, 99, 99, 99,
93 99, 99, 99, 99, 99, 99, 99, 99,
94 99, 99, 99, 99, 99, 99, 99, 99,
95 99, 99, 99, 99, 99, 99, 99, 99
99 static const int va_to_gen7_jpeg_hufftable[2] = {
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
112 static struct i965_kernel gen8_mfc_kernels[] = {
114 "MFC AVC INTRA BATCHBUFFER ",
115 MFC_BATCHBUFFER_AVC_INTRA,
116 gen8_mfc_batchbuffer_avc,
117 sizeof(gen8_mfc_batchbuffer_avc),
122 static struct i965_kernel gen9_mfc_kernels[] = {
124 "MFC AVC INTRA BATCHBUFFER ",
125 MFC_BATCHBUFFER_AVC_INTRA,
126 gen9_mfc_batchbuffer_avc,
127 sizeof(gen9_mfc_batchbuffer_avc),
132 static const uint32_t qm_flat[16] = {
133 0x10101010, 0x10101010, 0x10101010, 0x10101010,
134 0x10101010, 0x10101010, 0x10101010, 0x10101010,
135 0x10101010, 0x10101010, 0x10101010, 0x10101010,
136 0x10101010, 0x10101010, 0x10101010, 0x10101010
139 static const uint32_t fqm_flat[32] = {
140 0x10001000, 0x10001000, 0x10001000, 0x10001000,
141 0x10001000, 0x10001000, 0x10001000, 0x10001000,
142 0x10001000, 0x10001000, 0x10001000, 0x10001000,
143 0x10001000, 0x10001000, 0x10001000, 0x10001000,
144 0x10001000, 0x10001000, 0x10001000, 0x10001000,
145 0x10001000, 0x10001000, 0x10001000, 0x10001000,
146 0x10001000, 0x10001000, 0x10001000, 0x10001000,
147 0x10001000, 0x10001000, 0x10001000, 0x10001000
150 #define INTER_MODE_MASK 0x03
151 #define INTER_8X8 0x03
152 #define INTER_16X8 0x01
153 #define INTER_8X16 0x02
154 #define SUBMB_SHAPE_MASK 0x00FF00
155 #define INTER_16X16 0x00
157 #define INTER_MV8 (4 << 20)
158 #define INTER_MV32 (6 << 20)
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
164 struct intel_encoder_context *encoder_context)
166 struct intel_batchbuffer *batch = encoder_context->base.batch;
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
169 assert(standard_select == MFX_FORMAT_MPEG2 ||
170 standard_select == MFX_FORMAT_AVC ||
171 standard_select == MFX_FORMAT_JPEG ||
172 standard_select == MFX_FORMAT_VP8);
174 BEGIN_BCS_BATCH(batch, 5);
176 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
178 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179 (MFD_MODE_VLD << 15) | /* VLD mode */
180 (0 << 10) | /* Stream-Out Enable */
181 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
182 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
183 (0 << 6) | /* frame statistics stream-out enable*/
184 (0 << 5) | /* not in stitch mode */
185 (1 << 4) | /* encoding mode */
186 (standard_select << 0)); /* standard select: avc or mpeg2 or jpeg*/
188 (0 << 7) | /* expand NOA bus flag */
189 (0 << 6) | /* disable slice-level clock gating */
190 (0 << 5) | /* disable clock gating for NOA */
191 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
192 (0 << 3) | /* terminate if AVC mbdata error occurs */
193 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
199 ADVANCE_BCS_BATCH(batch);
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
205 struct intel_batchbuffer *batch = encoder_context->base.batch;
206 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
208 BEGIN_BCS_BATCH(batch, 6);
210 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211 OUT_BCS_BATCH(batch, 0);
213 ((mfc_context->surface_state.height - 1) << 18) |
214 ((mfc_context->surface_state.width - 1) << 4));
216 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218 (0 << 22) | /* surface object control state, FIXME??? */
219 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220 (0 << 2) | /* must be 0 for interleave U/V */
221 (1 << 1) | /* must be tiled */
222 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
224 (0 << 16) | /* must be 0 for interleave U/V */
225 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
226 OUT_BCS_BATCH(batch, 0);
228 ADVANCE_BCS_BATCH(batch);
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233 struct intel_encoder_context *encoder_context)
235 struct i965_driver_data *i965 = i965_driver_data(ctx);
236 struct intel_batchbuffer *batch = encoder_context->base.batch;
237 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
238 struct gen6_vme_context *vme_context = encoder_context->vme_context;
240 unsigned int bse_offset;
242 BEGIN_BCS_BATCH(batch, 26);
244 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
245 /* the DW1-3 is for the MFX indirect bistream offset */
246 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
250 /* the DW4-5 is the MFX upper bound */
251 if (encoder_context->codec == CODEC_VP8) {
252 OUT_BCS_RELOC64(batch,
253 mfc_context->mfc_indirect_pak_bse_object.bo,
254 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
255 mfc_context->mfc_indirect_pak_bse_object.end_offset);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
261 if(encoder_context->codec != CODEC_JPEG) {
262 vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
263 /* the DW6-10 is for MFX Indirect MV Object Base Address */
264 OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
265 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
268 /* No VME for JPEG */
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
276 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
277 OUT_BCS_BATCH(batch, 0);
278 OUT_BCS_BATCH(batch, 0);
279 OUT_BCS_BATCH(batch, 0);
280 OUT_BCS_BATCH(batch, 0);
281 OUT_BCS_BATCH(batch, 0);
283 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
284 OUT_BCS_BATCH(batch, 0);
285 OUT_BCS_BATCH(batch, 0);
286 OUT_BCS_BATCH(batch, 0);
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
290 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
291 bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
292 OUT_BCS_RELOC64(batch,
293 mfc_context->mfc_indirect_pak_bse_object.bo,
294 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
296 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
298 OUT_BCS_RELOC64(batch,
299 mfc_context->mfc_indirect_pak_bse_object.bo,
300 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
301 mfc_context->mfc_indirect_pak_bse_object.end_offset);
303 ADVANCE_BCS_BATCH(batch);
307 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
308 struct intel_encoder_context *encoder_context)
310 struct intel_batchbuffer *batch = encoder_context->base.batch;
311 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
312 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
314 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
315 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
317 BEGIN_BCS_BATCH(batch, 16);
319 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
320 /*DW1. MB setting of frame */
322 ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
324 ((height_in_mbs - 1) << 16) |
325 ((width_in_mbs - 1) << 0));
328 (0 << 24) | /* Second Chroma QP Offset */
329 (0 << 16) | /* Chroma QP Offset */
330 (0 << 14) | /* Max-bit conformance Intra flag */
331 (0 << 13) | /* Max Macroblock size conformance Inter flag */
332 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
333 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
334 (0 << 8) | /* FIXME: Image Structure */
335 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
337 (0 << 16) | /* Mininum Frame size */
338 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
339 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
340 (0 << 13) | /* CABAC 0 word insertion test enable */
341 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
342 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
343 (0 << 8) | /* FIXME: MbMvFormatFlag */
344 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
345 (0 << 6) | /* Only valid for VLD decoding mode */
346 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
347 (0 << 4) | /* Direct 8x8 inference flag */
348 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
349 (1 << 2) | /* Frame MB only flag */
350 (0 << 1) | /* MBAFF mode is in active */
351 (0 << 0)); /* Field picture flag */
352 /* DW5 Trellis quantization */
353 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
354 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
355 (0xBB8 << 16) | /* InterMbMaxSz */
356 (0xEE8) ); /* IntraMbMaxSz */
357 OUT_BCS_BATCH(batch, 0); /* Reserved */
359 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
360 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
361 /* DW10. Bit setting for MB */
362 OUT_BCS_BATCH(batch, 0x8C000000);
363 OUT_BCS_BATCH(batch, 0x00010000);
365 OUT_BCS_BATCH(batch, 0);
366 OUT_BCS_BATCH(batch, 0x02010100);
367 /* DW14. For short format */
368 OUT_BCS_BATCH(batch, 0);
369 OUT_BCS_BATCH(batch, 0);
371 ADVANCE_BCS_BATCH(batch);
375 gen8_mfc_qm_state(VADriverContextP ctx,
379 struct intel_encoder_context *encoder_context)
381 struct intel_batchbuffer *batch = encoder_context->base.batch;
382 unsigned int qm_buffer[16];
384 assert(qm_length <= 16);
385 assert(sizeof(*qm) == 4);
386 memcpy(qm_buffer, qm, qm_length * 4);
388 BEGIN_BCS_BATCH(batch, 18);
389 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
390 OUT_BCS_BATCH(batch, qm_type << 0);
391 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
392 ADVANCE_BCS_BATCH(batch);
396 gen8_mfc_avc_qm_state(VADriverContextP ctx,
397 struct encode_state *encode_state,
398 struct intel_encoder_context *encoder_context)
400 const unsigned int *qm_4x4_intra;
401 const unsigned int *qm_4x4_inter;
402 const unsigned int *qm_8x8_intra;
403 const unsigned int *qm_8x8_inter;
404 VAEncSequenceParameterBufferH264 *pSeqParameter =
405 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
406 VAEncPictureParameterBufferH264 *pPicParameter =
407 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
409 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
410 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
411 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
413 VAIQMatrixBufferH264 *qm;
414 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
415 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
416 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
417 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
418 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
419 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
422 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
423 gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
424 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
425 gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
429 gen8_mfc_fqm_state(VADriverContextP ctx,
433 struct intel_encoder_context *encoder_context)
435 struct intel_batchbuffer *batch = encoder_context->base.batch;
436 unsigned int fqm_buffer[32];
438 assert(fqm_length <= 32);
439 assert(sizeof(*fqm) == 4);
440 memcpy(fqm_buffer, fqm, fqm_length * 4);
442 BEGIN_BCS_BATCH(batch, 34);
443 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
444 OUT_BCS_BATCH(batch, fqm_type << 0);
445 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
446 ADVANCE_BCS_BATCH(batch);
450 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
453 for (i = 0; i < len; i++)
454 for (j = 0; j < len; j++)
455 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
459 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
460 struct encode_state *encode_state,
461 struct intel_encoder_context *encoder_context)
463 VAEncSequenceParameterBufferH264 *pSeqParameter =
464 (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
465 VAEncPictureParameterBufferH264 *pPicParameter =
466 (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
468 if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
469 && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
470 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
471 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
472 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
473 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
477 VAIQMatrixBufferH264 *qm;
478 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
479 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
481 for (i = 0; i < 3; i++)
482 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
483 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
485 for (i = 3; i < 6; i++)
486 gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
487 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
489 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
490 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
492 gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
493 gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
498 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
499 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
500 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
501 struct intel_batchbuffer *batch)
504 batch = encoder_context->base.batch;
506 if (data_bits_in_last_dw == 0)
507 data_bits_in_last_dw = 32;
509 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
511 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
513 (0 << 16) | /* always start at offset 0 */
514 (data_bits_in_last_dw << 8) |
515 (skip_emul_byte_count << 4) |
516 (!!emulation_flag << 3) |
517 ((!!is_last_header) << 2) |
518 ((!!is_end_of_slice) << 1) |
519 (0 << 0)); /* FIXME: ??? */
520 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
522 ADVANCE_BCS_BATCH(batch);
526 static void gen8_mfc_init(VADriverContextP ctx,
527 struct encode_state *encode_state,
528 struct intel_encoder_context *encoder_context)
530 struct i965_driver_data *i965 = i965_driver_data(ctx);
531 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
534 int width_in_mbs = 0;
535 int height_in_mbs = 0;
536 int slice_batchbuffer_size;
538 if (encoder_context->codec == CODEC_H264 ||
539 encoder_context->codec == CODEC_H264_MVC) {
540 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
541 width_in_mbs = pSequenceParameter->picture_width_in_mbs;
542 height_in_mbs = pSequenceParameter->picture_height_in_mbs;
543 } else if (encoder_context->codec == CODEC_MPEG2) {
544 VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
546 assert(encoder_context->codec == CODEC_MPEG2);
548 width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
549 height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
551 assert(encoder_context->codec == CODEC_JPEG);
552 VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
554 width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
555 height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
558 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
559 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
561 /*Encode common setup for MFC*/
562 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
563 mfc_context->post_deblocking_output.bo = NULL;
565 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
566 mfc_context->pre_deblocking_output.bo = NULL;
568 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
569 mfc_context->uncompressed_picture_source.bo = NULL;
571 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
572 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
574 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
575 if (mfc_context->direct_mv_buffers[i].bo != NULL)
576 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
577 mfc_context->direct_mv_buffers[i].bo = NULL;
580 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
581 if (mfc_context->reference_surfaces[i].bo != NULL)
582 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
583 mfc_context->reference_surfaces[i].bo = NULL;
586 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
587 bo = dri_bo_alloc(i965->intel.bufmgr,
592 mfc_context->intra_row_store_scratch_buffer.bo = bo;
594 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
595 bo = dri_bo_alloc(i965->intel.bufmgr,
597 width_in_mbs * height_in_mbs * 16,
600 mfc_context->macroblock_status_buffer.bo = bo;
602 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
603 bo = dri_bo_alloc(i965->intel.bufmgr,
605 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
608 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
610 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
611 bo = dri_bo_alloc(i965->intel.bufmgr,
613 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
616 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
618 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
619 mfc_context->mfc_batchbuffer_surface.bo = NULL;
621 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
622 mfc_context->aux_batchbuffer_surface.bo = NULL;
624 if (mfc_context->aux_batchbuffer)
625 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
627 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
628 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
629 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
630 mfc_context->aux_batchbuffer_surface.pitch = 16;
631 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
632 mfc_context->aux_batchbuffer_surface.size_block = 16;
634 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
638 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
639 struct intel_encoder_context *encoder_context)
641 struct i965_driver_data *i965 = i965_driver_data(ctx);
642 struct intel_batchbuffer *batch = encoder_context->base.batch;
643 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
646 BEGIN_BCS_BATCH(batch, 61);
648 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
650 /* the DW1-3 is for pre_deblocking */
651 if (mfc_context->pre_deblocking_output.bo)
652 OUT_BCS_RELOC64(batch, mfc_context->pre_deblocking_output.bo,
653 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
656 OUT_BCS_BATCH(batch, 0);
657 OUT_BCS_BATCH(batch, 0); /* pre output addr */
660 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
661 /* the DW4-6 is for the post_deblocking */
663 if (mfc_context->post_deblocking_output.bo)
664 OUT_BCS_RELOC64(batch, mfc_context->post_deblocking_output.bo,
665 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
666 0); /* post output addr */
668 OUT_BCS_BATCH(batch, 0);
669 OUT_BCS_BATCH(batch, 0);
672 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
674 /* the DW7-9 is for the uncompressed_picture */
675 OUT_BCS_RELOC64(batch, mfc_context->uncompressed_picture_source.bo,
676 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
677 0); /* uncompressed data */
679 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
681 /* the DW10-12 is for the mb status */
682 OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
683 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
684 0); /* StreamOut data*/
686 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
688 /* the DW13-15 is for the intra_row_store_scratch */
689 OUT_BCS_RELOC64(batch, mfc_context->intra_row_store_scratch_buffer.bo,
690 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
693 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
695 /* the DW16-18 is for the deblocking filter */
696 OUT_BCS_RELOC64(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
697 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
700 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
702 /* the DW 19-50 is for Reference pictures*/
703 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
704 if ( mfc_context->reference_surfaces[i].bo != NULL) {
705 OUT_BCS_RELOC64(batch, mfc_context->reference_surfaces[i].bo,
706 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
709 OUT_BCS_BATCH(batch, 0);
710 OUT_BCS_BATCH(batch, 0);
715 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
717 /* The DW 52-54 is for the MB status buffer */
718 OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
719 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
720 0); /* Macroblock status buffer*/
722 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
724 /* the DW 55-57 is the ILDB buffer */
725 OUT_BCS_BATCH(batch, 0);
726 OUT_BCS_BATCH(batch, 0);
727 OUT_BCS_BATCH(batch, 0);
729 /* the DW 58-60 is the second ILDB buffer */
730 OUT_BCS_BATCH(batch, 0);
731 OUT_BCS_BATCH(batch, 0);
732 OUT_BCS_BATCH(batch, 0);
734 ADVANCE_BCS_BATCH(batch);
738 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
739 struct intel_encoder_context *encoder_context)
741 struct i965_driver_data *i965 = i965_driver_data(ctx);
742 struct intel_batchbuffer *batch = encoder_context->base.batch;
743 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
747 BEGIN_BCS_BATCH(batch, 71);
749 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
751 /* Reference frames and Current frames */
752 /* the DW1-32 is for the direct MV for reference */
753 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
754 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
755 OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[i].bo,
756 I915_GEM_DOMAIN_INSTRUCTION, 0,
759 OUT_BCS_BATCH(batch, 0);
760 OUT_BCS_BATCH(batch, 0);
764 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
766 /* the DW34-36 is the MV for the current reference */
767 OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
768 I915_GEM_DOMAIN_INSTRUCTION, 0,
771 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
774 for(i = 0; i < 32; i++) {
775 OUT_BCS_BATCH(batch, i/2);
777 OUT_BCS_BATCH(batch, 0);
778 OUT_BCS_BATCH(batch, 0);
780 ADVANCE_BCS_BATCH(batch);
785 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
786 struct intel_encoder_context *encoder_context)
788 struct i965_driver_data *i965 = i965_driver_data(ctx);
789 struct intel_batchbuffer *batch = encoder_context->base.batch;
790 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
792 BEGIN_BCS_BATCH(batch, 10);
794 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
795 OUT_BCS_RELOC64(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
796 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
798 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
800 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
801 OUT_BCS_BATCH(batch, 0);
802 OUT_BCS_BATCH(batch, 0);
803 OUT_BCS_BATCH(batch, 0);
805 /* the DW7-9 is for Bitplane Read Buffer Base Address */
806 OUT_BCS_BATCH(batch, 0);
807 OUT_BCS_BATCH(batch, 0);
808 OUT_BCS_BATCH(batch, 0);
810 ADVANCE_BCS_BATCH(batch);
814 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
815 struct encode_state *encode_state,
816 struct intel_encoder_context *encoder_context)
818 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
820 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
821 mfc_context->set_surface_state(ctx, encoder_context);
822 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
823 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
824 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
825 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
826 mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
827 mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
828 gen8_mfc_avc_directmode_state(ctx, encoder_context);
829 intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
833 static VAStatus gen8_mfc_run(VADriverContextP ctx,
834 struct encode_state *encode_state,
835 struct intel_encoder_context *encoder_context)
837 struct intel_batchbuffer *batch = encoder_context->base.batch;
839 intel_batchbuffer_flush(batch); //run the pipeline
841 return VA_STATUS_SUCCESS;
846 gen8_mfc_stop(VADriverContextP ctx,
847 struct encode_state *encode_state,
848 struct intel_encoder_context *encoder_context,
849 int *encoded_bits_size)
851 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
852 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
853 VACodedBufferSegment *coded_buffer_segment;
855 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
856 assert(vaStatus == VA_STATUS_SUCCESS);
857 *encoded_bits_size = coded_buffer_segment->size * 8;
858 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
860 return VA_STATUS_SUCCESS;
865 gen8_mfc_avc_slice_state(VADriverContextP ctx,
866 VAEncPictureParameterBufferH264 *pic_param,
867 VAEncSliceParameterBufferH264 *slice_param,
868 struct encode_state *encode_state,
869 struct intel_encoder_context *encoder_context,
870 int rate_control_enable,
872 struct intel_batchbuffer *batch)
874 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
876 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
877 int beginmb = slice_param->macroblock_address;
878 int endmb = beginmb + slice_param->num_macroblocks;
879 int beginx = beginmb % width_in_mbs;
880 int beginy = beginmb / width_in_mbs;
881 int nextx = endmb % width_in_mbs;
882 int nexty = endmb / width_in_mbs;
883 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
884 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
886 unsigned char correct[6], grow, shrink;
888 int weighted_pred_idc = 0;
889 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
890 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
891 int num_ref_l0 = 0, num_ref_l1 = 0;
894 batch = encoder_context->base.batch;
896 if (slice_type == SLICE_TYPE_I) {
897 luma_log2_weight_denom = 0;
898 chroma_log2_weight_denom = 0;
899 } else if (slice_type == SLICE_TYPE_P) {
900 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
901 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
903 if (slice_param->num_ref_idx_active_override_flag)
904 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
905 } else if (slice_type == SLICE_TYPE_B) {
906 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
907 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
908 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
910 if (slice_param->num_ref_idx_active_override_flag) {
911 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
912 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
915 if (weighted_pred_idc == 2) {
916 /* 8.4.3 - Derivation process for prediction weights (8-279) */
917 luma_log2_weight_denom = 5;
918 chroma_log2_weight_denom = 5;
922 maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
923 maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
925 for (i = 0; i < 6; i++)
926 correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
928 grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
929 (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
930 shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
931 (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
933 BEGIN_BCS_BATCH(batch, 11);;
935 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
936 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
941 (chroma_log2_weight_denom << 8) |
942 (luma_log2_weight_denom << 0));
945 (weighted_pred_idc << 30) |
946 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
947 (slice_param->disable_deblocking_filter_idc << 27) |
948 (slice_param->cabac_init_idc << 24) |
949 (qp<<16) | /*Slice Quantization Parameter*/
950 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
951 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
953 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
955 slice_param->macroblock_address );
956 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
958 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
959 (1 << 30) | /*ResetRateControlCounter*/
960 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
961 (4 << 24) | /*RC Stable Tolerance, middle level*/
962 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
963 (0 << 22) | /*QP mode, don't modfiy CBP*/
964 (0 << 21) | /*MB Type Direct Conversion Enabled*/
965 (0 << 20) | /*MB Type Skip Conversion Enabled*/
966 (last_slice << 19) | /*IsLastSlice*/
967 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
968 (1 << 17) | /*HeaderPresentFlag*/
969 (1 << 16) | /*SliceData PresentFlag*/
970 (1 << 15) | /*TailPresentFlag*/
971 (1 << 13) | /*RBSP NAL TYPE*/
972 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
973 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
975 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
976 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
986 OUT_BCS_BATCH(batch, 0);
988 ADVANCE_BCS_BATCH(batch);
991 #define AVC_INTRA_RDO_OFFSET 4
992 #define AVC_INTER_RDO_OFFSET 10
993 #define AVC_INTER_MSG_OFFSET 8
994 #define AVC_INTER_MV_OFFSET 48
995 #define AVC_RDO_MASK 0xFFFF
998 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
999 int qp,unsigned int *msg,
1000 struct intel_encoder_context *encoder_context,
1001 unsigned char target_mb_size, unsigned char max_mb_size,
1002 struct intel_batchbuffer *batch)
1004 int len_in_dwords = 12;
1005 unsigned int intra_msg;
1006 #define INTRA_MSG_FLAG (1 << 13)
1007 #define INTRA_MBTYPE_MASK (0x1F0000)
1009 batch = encoder_context->base.batch;
1011 BEGIN_BCS_BATCH(batch, len_in_dwords);
1013 intra_msg = msg[0] & 0xC0FF;
1014 intra_msg |= INTRA_MSG_FLAG;
1015 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1016 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1017 OUT_BCS_BATCH(batch, 0);
1018 OUT_BCS_BATCH(batch, 0);
1019 OUT_BCS_BATCH(batch,
1020 (0 << 24) | /* PackedMvNum, Debug*/
1021 (0 << 20) | /* No motion vector */
1022 (1 << 19) | /* CbpDcY */
1023 (1 << 18) | /* CbpDcU */
1024 (1 << 17) | /* CbpDcV */
1027 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1028 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1029 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1031 /*Stuff for Intra MB*/
1032 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1033 OUT_BCS_BATCH(batch, msg[2]);
1034 OUT_BCS_BATCH(batch, msg[3]&0xFF);
1036 /*MaxSizeInWord and TargetSzieInWord*/
1037 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1038 (target_mb_size << 16) );
1040 OUT_BCS_BATCH(batch, 0);
1042 ADVANCE_BCS_BATCH(batch);
1044 return len_in_dwords;
1048 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1049 unsigned int *msg, unsigned int offset,
1050 struct intel_encoder_context *encoder_context,
1051 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1052 struct intel_batchbuffer *batch)
1054 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1055 int len_in_dwords = 12;
1056 unsigned int inter_msg = 0;
1058 batch = encoder_context->base.batch;
1060 #define MSG_MV_OFFSET 4
1061 unsigned int *mv_ptr;
1062 mv_ptr = msg + MSG_MV_OFFSET;
1063 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1064 * to convert them to be compatible with the format of AVC_PAK
1067 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1068 /* MV[0] and MV[2] are replicated */
1069 mv_ptr[4] = mv_ptr[0];
1070 mv_ptr[5] = mv_ptr[1];
1071 mv_ptr[2] = mv_ptr[8];
1072 mv_ptr[3] = mv_ptr[9];
1073 mv_ptr[6] = mv_ptr[8];
1074 mv_ptr[7] = mv_ptr[9];
1075 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1076 /* MV[0] and MV[1] are replicated */
1077 mv_ptr[2] = mv_ptr[0];
1078 mv_ptr[3] = mv_ptr[1];
1079 mv_ptr[4] = mv_ptr[16];
1080 mv_ptr[5] = mv_ptr[17];
1081 mv_ptr[6] = mv_ptr[24];
1082 mv_ptr[7] = mv_ptr[25];
1083 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1084 !(msg[1] & SUBMB_SHAPE_MASK)) {
1085 /* Don't touch MV[0] or MV[1] */
1086 mv_ptr[2] = mv_ptr[8];
1087 mv_ptr[3] = mv_ptr[9];
1088 mv_ptr[4] = mv_ptr[16];
1089 mv_ptr[5] = mv_ptr[17];
1090 mv_ptr[6] = mv_ptr[24];
1091 mv_ptr[7] = mv_ptr[25];
1095 BEGIN_BCS_BATCH(batch, len_in_dwords);
1097 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1101 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1102 if (msg[1] & SUBMB_SHAPE_MASK)
1105 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1106 OUT_BCS_BATCH(batch, offset);
1107 inter_msg = msg[0] & (0x1F00FFFF);
1108 inter_msg |= INTER_MV8;
1109 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1110 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1111 (msg[1] & SUBMB_SHAPE_MASK)) {
1112 inter_msg |= INTER_MV32;
1115 OUT_BCS_BATCH(batch, inter_msg);
1117 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1118 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1120 if ( slice_type == SLICE_TYPE_B) {
1121 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1123 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1126 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1129 inter_msg = msg[1] >> 8;
1130 /*Stuff for Inter MB*/
1131 OUT_BCS_BATCH(batch, inter_msg);
1132 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1133 OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1135 /*MaxSizeInWord and TargetSzieInWord*/
1136 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1137 (target_mb_size << 16) );
1139 OUT_BCS_BATCH(batch, 0x0);
1141 ADVANCE_BCS_BATCH(batch);
1143 return len_in_dwords;
1147 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1148 struct encode_state *encode_state,
1149 struct intel_encoder_context *encoder_context,
1151 struct intel_batchbuffer *slice_batch)
1153 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1154 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1155 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1156 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1157 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1158 unsigned int *msg = NULL, offset = 0;
1159 unsigned char *msg_ptr = NULL;
1160 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1161 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1162 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1164 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1165 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1166 unsigned int tail_data[] = { 0x0, 0x0 };
1167 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1168 int is_intra = slice_type == SLICE_TYPE_I;
1173 if (rate_control_mode != VA_RC_CQP) {
1174 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1175 if (encode_state->slice_header_index[slice_index] == 0) {
1176 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1181 /* only support for 8-bit pixel bit-depth */
1182 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1183 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1184 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1185 assert(qp >= 0 && qp < 52);
1187 gen8_mfc_avc_slice_state(ctx,
1190 encode_state, encoder_context,
1191 (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
1193 if ( slice_index == 0)
1194 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1198 dri_bo_map(vme_context->vme_output.bo , 1);
1199 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1202 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1204 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1207 for (i = pSliceParameter->macroblock_address;
1208 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1209 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1210 x = i % width_in_mbs;
1211 y = i / width_in_mbs;
1212 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1213 if (vme_context->roi_enabled) {
1214 qp_mb = *(vme_context->qp_per_mb + i);
1220 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1222 int inter_rdo, intra_rdo;
1223 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1224 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1225 offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1226 if (intra_rdo < inter_rdo) {
1227 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1229 msg += AVC_INTER_MSG_OFFSET;
1230 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1235 dri_bo_unmap(vme_context->vme_output.bo);
1238 mfc_context->insert_object(ctx, encoder_context,
1240 2, 1, 1, 0, slice_batch);
1242 mfc_context->insert_object(ctx, encoder_context,
1244 1, 1, 1, 0, slice_batch);
1249 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1250 struct encode_state *encode_state,
1251 struct intel_encoder_context *encoder_context)
1253 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1254 struct intel_batchbuffer *batch;
1258 batch = mfc_context->aux_batchbuffer;
1259 batch_bo = batch->buffer;
1260 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1261 gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1264 intel_batchbuffer_align(batch, 8);
1266 BEGIN_BCS_BATCH(batch, 2);
1267 OUT_BCS_BATCH(batch, 0);
1268 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1269 ADVANCE_BCS_BATCH(batch);
1271 dri_bo_reference(batch_bo);
1272 intel_batchbuffer_free(batch);
1273 mfc_context->aux_batchbuffer = NULL;
1280 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1281 struct encode_state *encode_state,
1282 struct intel_encoder_context *encoder_context)
1284 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1285 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1287 assert(vme_context->vme_output.bo);
1288 mfc_context->buffer_suface_setup(ctx,
1289 &mfc_context->gpe_context,
1290 &vme_context->vme_output,
1291 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1292 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1296 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1297 struct encode_state *encode_state,
1298 struct intel_encoder_context *encoder_context)
1300 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1301 assert(mfc_context->aux_batchbuffer_surface.bo);
1302 mfc_context->buffer_suface_setup(ctx,
1303 &mfc_context->gpe_context,
1304 &mfc_context->aux_batchbuffer_surface,
1305 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1306 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1310 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1311 struct encode_state *encode_state,
1312 struct intel_encoder_context *encoder_context)
1314 gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1315 gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1319 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1320 struct encode_state *encode_state,
1321 struct intel_encoder_context *encoder_context)
1323 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1324 struct gen8_interface_descriptor_data *desc;
1327 unsigned char *desc_ptr;
1329 bo = mfc_context->gpe_context.idrt.bo;
1331 assert(bo->virtual);
1332 desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;
1334 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1336 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1337 struct i965_kernel *kernel;
1338 kernel = &mfc_context->gpe_context.kernels[i];
1339 assert(sizeof(*desc) == 32);
1340 /*Setup the descritor table*/
1341 memset(desc, 0, sizeof(*desc));
1342 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1343 desc->desc3.sampler_count = 0;
1344 desc->desc3.sampler_state_pointer = 0;
1345 desc->desc4.binding_table_entry_count = 1;
1346 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1347 desc->desc5.constant_urb_entry_read_offset = 0;
1348 desc->desc5.constant_urb_entry_read_length = 4;
1360 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1361 struct encode_state *encode_state,
1362 struct intel_encoder_context *encoder_context)
1364 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1369 #define AVC_PAK_LEN_IN_BYTE 48
1370 #define AVC_PAK_LEN_IN_OWORD 3
1373 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1374 uint32_t intra_flag,
1386 uint32_t temp_value;
1387 BEGIN_BATCH(batch, 14);
1389 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1390 OUT_BATCH(batch, 0);
1391 OUT_BATCH(batch, 0);
1392 OUT_BATCH(batch, 0);
1393 OUT_BATCH(batch, 0);
1394 OUT_BATCH(batch, 0);
1397 OUT_BATCH(batch, head_offset / 16);
1398 OUT_BATCH(batch, (intra_flag) | (qp << 16));
1399 temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1400 OUT_BATCH(batch, temp_value);
1402 OUT_BATCH(batch, number_mb_cmds);
1405 ((slice_end_y << 8) | (slice_end_x)));
1406 OUT_BATCH(batch, fwd_ref);
1407 OUT_BATCH(batch, bwd_ref);
1409 OUT_BATCH(batch, MI_NOOP);
1411 ADVANCE_BATCH(batch);
1415 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1416 struct intel_encoder_context *encoder_context,
1417 VAEncSliceParameterBufferH264 *slice_param,
1422 struct intel_batchbuffer *batch = encoder_context->base.batch;
1423 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1424 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1425 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1426 int total_mbs = slice_param->num_macroblocks;
1427 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1428 int number_mb_cmds = 128;
1429 int starting_offset = 0;
1431 int last_mb, slice_end_x, slice_end_y;
1432 int remaining_mb = total_mbs;
1433 uint32_t fwd_ref , bwd_ref, mb_flag;
1435 int number_roi_mbs, max_mb_cmds, i;
1437 last_mb = slice_param->macroblock_address + total_mbs - 1;
1438 slice_end_x = last_mb % width_in_mbs;
1439 slice_end_y = last_mb / width_in_mbs;
1441 if (slice_type == SLICE_TYPE_I) {
1446 fwd_ref = vme_context->ref_index_in_mb[0];
1447 bwd_ref = vme_context->ref_index_in_mb[1];
1451 if (width_in_mbs >= 100) {
1452 number_mb_cmds = width_in_mbs / 5;
1453 } else if (width_in_mbs >= 80) {
1454 number_mb_cmds = width_in_mbs / 4;
1455 } else if (width_in_mbs >= 60) {
1456 number_mb_cmds = width_in_mbs / 3;
1457 } else if (width_in_mbs >= 40) {
1458 number_mb_cmds = width_in_mbs / 2;
1460 number_mb_cmds = width_in_mbs;
1463 max_mb_cmds = number_mb_cmds;
1466 mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1467 mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1469 number_mb_cmds = max_mb_cmds;
1470 if (vme_context->roi_enabled) {
1473 tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1474 for (i = 1; i < max_mb_cmds; i++) {
1475 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1481 number_mb_cmds = number_roi_mbs;
1485 if (number_mb_cmds >= remaining_mb) {
1486 number_mb_cmds = remaining_mb;
1489 gen8_mfc_batchbuffer_emit_object_command(batch,
1502 head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1503 remaining_mb -= number_mb_cmds;
1504 starting_offset += number_mb_cmds;
1505 } while (remaining_mb > 0);
1509 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1510 struct encode_state *encode_state,
1511 struct intel_encoder_context *encoder_context,
1514 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1515 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1516 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1517 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1518 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1519 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1520 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1521 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1522 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1523 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1524 unsigned int tail_data[] = { 0x0, 0x0 };
1526 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1530 if (rate_control_mode != VA_RC_CQP) {
1531 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1532 if (encode_state->slice_header_index[slice_index] == 0) {
1533 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1538 /* only support for 8-bit pixel bit-depth */
1539 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1540 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1541 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1542 assert(qp >= 0 && qp < 52);
1544 gen8_mfc_avc_slice_state(ctx,
1549 (rate_control_mode != VA_RC_CQP),
1553 if (slice_index == 0)
1554 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1556 intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1558 intel_batchbuffer_align(slice_batch, 64); /* aligned by an Cache-line */
1559 head_offset = intel_batchbuffer_used_size(slice_batch);
1561 slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1563 gen8_mfc_avc_batchbuffer_slice_command(ctx,
1571 /* Aligned for tail */
1572 intel_batchbuffer_align(slice_batch, 64); /* aligned by Cache-line */
1574 mfc_context->insert_object(ctx,
1585 mfc_context->insert_object(ctx,
1601 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1602 struct encode_state *encode_state,
1603 struct intel_encoder_context *encoder_context)
1605 struct i965_driver_data *i965 = i965_driver_data(ctx);
1606 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1607 struct intel_batchbuffer *batch = encoder_context->base.batch;
1610 intel_batchbuffer_start_atomic(batch, 0x4000);
1612 if (IS_GEN9(i965->intel.device_info))
1613 gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1615 gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1617 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1618 gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1621 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1623 intel_batchbuffer_align(slice_batch, 8);
1624 BEGIN_BCS_BATCH(slice_batch, 2);
1625 OUT_BCS_BATCH(slice_batch, 0);
1626 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1627 ADVANCE_BCS_BATCH(slice_batch);
1629 BEGIN_BATCH(batch, 2);
1630 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1631 OUT_BATCH(batch, 0);
1632 ADVANCE_BATCH(batch);
1634 intel_batchbuffer_free(slice_batch);
1635 mfc_context->aux_batchbuffer = NULL;
1638 if (IS_GEN9(i965->intel.device_info))
1639 gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1641 intel_batchbuffer_end_atomic(batch);
1642 intel_batchbuffer_flush(batch);
1647 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1648 struct encode_state *encode_state,
1649 struct intel_encoder_context *encoder_context)
1651 gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1652 gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1653 gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1654 gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1658 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1659 struct encode_state *encode_state,
1660 struct intel_encoder_context *encoder_context)
1662 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1664 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1665 gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1667 return mfc_context->aux_batchbuffer_surface.bo;
1671 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1672 struct encode_state *encode_state,
1673 struct intel_encoder_context *encoder_context)
1675 struct intel_batchbuffer *batch = encoder_context->base.batch;
1676 dri_bo *slice_batch_bo;
1678 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1679 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1684 if (encoder_context->soft_batch_force)
1685 slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1687 slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1691 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1692 intel_batchbuffer_emit_mi_flush(batch);
1694 // picture level programing
1695 gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1697 BEGIN_BCS_BATCH(batch, 3);
1698 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1699 OUT_BCS_RELOC64(batch,
1701 I915_GEM_DOMAIN_COMMAND, 0,
1703 ADVANCE_BCS_BATCH(batch);
1706 intel_batchbuffer_end_atomic(batch);
1708 dri_bo_unreference(slice_batch_bo);
1713 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1714 struct encode_state *encode_state,
1715 struct intel_encoder_context *encoder_context)
1717 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1718 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1719 int current_frame_bits_size;
1723 gen8_mfc_init(ctx, encode_state, encoder_context);
1724 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1725 /*Programing bcs pipeline*/
1726 gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1727 gen8_mfc_run(ctx, encode_state, encoder_context);
1728 if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
1729 gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1730 sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1731 if (sts == BRC_NO_HRD_VIOLATION) {
1732 intel_mfc_hrd_context_update(encode_state, mfc_context);
1735 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1736 if (!mfc_context->hrd.violation_noted) {
1737 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1738 mfc_context->hrd.violation_noted = 1;
1740 return VA_STATUS_SUCCESS;
1747 return VA_STATUS_SUCCESS;
1755 va_to_gen8_mpeg2_picture_type[3] = {
1762 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1763 struct intel_encoder_context *encoder_context,
1764 struct encode_state *encode_state)
1766 struct intel_batchbuffer *batch = encoder_context->base.batch;
1767 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1768 VAEncPictureParameterBufferMPEG2 *pic_param;
1769 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1770 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1771 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1773 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1774 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1775 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1777 BEGIN_BCS_BATCH(batch, 13);
1778 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1779 OUT_BCS_BATCH(batch,
1780 (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1781 (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1782 (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1783 (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1784 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1785 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1786 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1787 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1788 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1789 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1790 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1791 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1792 OUT_BCS_BATCH(batch,
1793 0 << 14 | /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1794 va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1796 OUT_BCS_BATCH(batch,
1797 1 << 31 | /* slice concealment */
1798 (height_in_mbs - 1) << 16 |
1799 (width_in_mbs - 1));
1801 if (slice_param && slice_param->quantiser_scale_code >= 14)
1802 OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1804 OUT_BCS_BATCH(batch, 0);
1806 OUT_BCS_BATCH(batch, 0);
1807 OUT_BCS_BATCH(batch,
1808 0xFFF << 16 | /* InterMBMaxSize */
1809 0xFFF << 0 | /* IntraMBMaxSize */
1811 OUT_BCS_BATCH(batch, 0);
1812 OUT_BCS_BATCH(batch, 0);
1813 OUT_BCS_BATCH(batch, 0);
1814 OUT_BCS_BATCH(batch, 0);
1815 OUT_BCS_BATCH(batch, 0);
1816 OUT_BCS_BATCH(batch, 0);
1817 ADVANCE_BCS_BATCH(batch);
1821 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1823 unsigned char intra_qm[64] = {
1824 8, 16, 19, 22, 26, 27, 29, 34,
1825 16, 16, 22, 24, 27, 29, 34, 37,
1826 19, 22, 26, 27, 29, 34, 34, 38,
1827 22, 22, 26, 27, 29, 34, 37, 40,
1828 22, 26, 27, 29, 32, 35, 40, 48,
1829 26, 27, 29, 32, 35, 40, 48, 58,
1830 26, 27, 29, 34, 38, 46, 56, 69,
1831 27, 29, 35, 38, 46, 56, 69, 83
1834 unsigned char non_intra_qm[64] = {
1835 16, 16, 16, 16, 16, 16, 16, 16,
1836 16, 16, 16, 16, 16, 16, 16, 16,
1837 16, 16, 16, 16, 16, 16, 16, 16,
1838 16, 16, 16, 16, 16, 16, 16, 16,
1839 16, 16, 16, 16, 16, 16, 16, 16,
1840 16, 16, 16, 16, 16, 16, 16, 16,
1841 16, 16, 16, 16, 16, 16, 16, 16,
1842 16, 16, 16, 16, 16, 16, 16, 16
1845 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1846 gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1850 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1852 unsigned short intra_fqm[64] = {
1853 65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1854 65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1855 65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1856 65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1857 65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1858 65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1859 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1860 65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1863 unsigned short non_intra_fqm[64] = {
1864 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1865 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1866 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1867 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1868 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1869 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1875 gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1879 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1880 struct intel_encoder_context *encoder_context,
1882 int next_x, int next_y,
1883 int is_fisrt_slice_group,
1884 int is_last_slice_group,
1887 struct intel_batchbuffer *batch)
1889 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1892 batch = encoder_context->base.batch;
1894 BEGIN_BCS_BATCH(batch, 8);
1896 OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1897 OUT_BCS_BATCH(batch,
1898 0 << 31 | /* MbRateCtrlFlag */
1899 !!is_last_slice_group << 19 | /* IsLastSliceGrp */
1900 1 << 17 | /* Insert Header before the first slice group data */
1901 1 << 16 | /* SliceData PresentFlag: always 1 */
1902 1 << 15 | /* TailPresentFlag: always 1 */
1903 0 << 14 | /* FirstSliceHdrDisabled: slice header for each slice */
1904 !!intra_slice << 13 | /* IntraSlice */
1905 !!intra_slice << 12 | /* IntraSliceFlag */
1907 OUT_BCS_BATCH(batch,
1913 OUT_BCS_BATCH(batch, qp); /* FIXME: SliceGroupQp */
1914 /* bitstream pointer is only loaded once for the first slice of a frame when
1915 * LoadSlicePointerFlag is 0
1917 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1918 OUT_BCS_BATCH(batch, 0); /* FIXME: */
1919 OUT_BCS_BATCH(batch, 0); /* FIXME: CorrectPoints */
1920 OUT_BCS_BATCH(batch, 0); /* FIXME: CVxxx */
1922 ADVANCE_BCS_BATCH(batch);
1926 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1927 struct intel_encoder_context *encoder_context,
1929 int first_mb_in_slice,
1930 int last_mb_in_slice,
1931 int first_mb_in_slice_group,
1932 int last_mb_in_slice_group,
1935 int coded_block_pattern,
1936 unsigned char target_size_in_word,
1937 unsigned char max_size_in_word,
1938 struct intel_batchbuffer *batch)
1940 int len_in_dwords = 9;
1943 batch = encoder_context->base.batch;
1945 BEGIN_BCS_BATCH(batch, len_in_dwords);
1947 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1948 OUT_BCS_BATCH(batch,
1949 0 << 24 | /* PackedMvNum */
1950 0 << 20 | /* MvFormat */
1951 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
1952 0 << 15 | /* TransformFlag: frame DCT */
1953 0 << 14 | /* FieldMbFlag */
1954 1 << 13 | /* IntraMbFlag */
1955 mb_type << 8 | /* MbType: Intra */
1956 0 << 2 | /* SkipMbFlag */
1957 0 << 0 | /* InterMbMode */
1959 OUT_BCS_BATCH(batch, y << 16 | x);
1960 OUT_BCS_BATCH(batch,
1961 max_size_in_word << 24 |
1962 target_size_in_word << 16 |
1963 coded_block_pattern << 6 | /* CBP */
1965 OUT_BCS_BATCH(batch,
1966 last_mb_in_slice << 31 |
1967 first_mb_in_slice << 30 |
1968 0 << 27 | /* EnableCoeffClamp */
1969 last_mb_in_slice_group << 26 |
1970 0 << 25 | /* MbSkipConvDisable */
1971 first_mb_in_slice_group << 24 |
1972 0 << 16 | /* MvFieldSelect */
1973 qp_scale_code << 0 |
1975 OUT_BCS_BATCH(batch, 0); /* MV[0][0] */
1976 OUT_BCS_BATCH(batch, 0); /* MV[1][0] */
1977 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
1978 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
1980 ADVANCE_BCS_BATCH(batch);
1982 return len_in_dwords;
1986 #define MPEG2_INTER_MV_OFFSET 48
1988 static struct _mv_ranges
1990 int low; /* in the unit of 1/2 pixel */
1991 int high; /* in the unit of 1/2 pixel */
2006 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2008 if (mv + pos * 16 * 2 < 0 ||
2009 mv + (pos + 1) * 16 * 2 > display_max * 2)
2012 if (f_code > 0 && f_code < 10) {
2013 if (mv < mv_ranges[f_code].low)
2014 mv = mv_ranges[f_code].low;
2016 if (mv > mv_ranges[f_code].high)
2017 mv = mv_ranges[f_code].high;
2024 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2025 struct encode_state *encode_state,
2026 struct intel_encoder_context *encoder_context,
2028 int width_in_mbs, int height_in_mbs,
2030 int first_mb_in_slice,
2031 int last_mb_in_slice,
2032 int first_mb_in_slice_group,
2033 int last_mb_in_slice_group,
2035 unsigned char target_size_in_word,
2036 unsigned char max_size_in_word,
2037 struct intel_batchbuffer *batch)
2039 VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2040 int len_in_dwords = 9;
2041 short *mvptr, mvx0, mvy0, mvx1, mvy1;
2044 batch = encoder_context->base.batch;
2046 mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2047 mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2048 mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2049 mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2050 mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2052 BEGIN_BCS_BATCH(batch, len_in_dwords);
2054 OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2055 OUT_BCS_BATCH(batch,
2056 2 << 24 | /* PackedMvNum */
2057 7 << 20 | /* MvFormat */
2058 7 << 17 | /* CbpDcY/CbpDcU/CbpDcV */
2059 0 << 15 | /* TransformFlag: frame DCT */
2060 0 << 14 | /* FieldMbFlag */
2061 0 << 13 | /* IntraMbFlag */
2062 1 << 8 | /* MbType: Frame-based */
2063 0 << 2 | /* SkipMbFlag */
2064 0 << 0 | /* InterMbMode */
2066 OUT_BCS_BATCH(batch, y << 16 | x);
2067 OUT_BCS_BATCH(batch,
2068 max_size_in_word << 24 |
2069 target_size_in_word << 16 |
2070 0x3f << 6 | /* CBP */
2072 OUT_BCS_BATCH(batch,
2073 last_mb_in_slice << 31 |
2074 first_mb_in_slice << 30 |
2075 0 << 27 | /* EnableCoeffClamp */
2076 last_mb_in_slice_group << 26 |
2077 0 << 25 | /* MbSkipConvDisable */
2078 first_mb_in_slice_group << 24 |
2079 0 << 16 | /* MvFieldSelect */
2080 qp_scale_code << 0 |
2083 OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16); /* MV[0][0] */
2084 OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16); /* MV[1][0] */
2085 OUT_BCS_BATCH(batch, 0); /* MV[0][1] */
2086 OUT_BCS_BATCH(batch, 0); /* MV[1][1] */
2088 ADVANCE_BCS_BATCH(batch);
2090 return len_in_dwords;
2094 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2095 struct encode_state *encode_state,
2096 struct intel_encoder_context *encoder_context,
2097 struct intel_batchbuffer *slice_batch)
2099 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2100 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2102 if (encode_state->packed_header_data[idx]) {
2103 VAEncPackedHeaderParameterBuffer *param = NULL;
2104 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2105 unsigned int length_in_bits;
2107 assert(encode_state->packed_header_param[idx]);
2108 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2109 length_in_bits = param->bit_length;
2111 mfc_context->insert_object(ctx,
2114 ALIGN(length_in_bits, 32) >> 5,
2115 length_in_bits & 0x1f,
2116 5, /* FIXME: check it */
2119 0, /* Needn't insert emulation bytes for MPEG-2 */
2123 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2125 if (encode_state->packed_header_data[idx]) {
2126 VAEncPackedHeaderParameterBuffer *param = NULL;
2127 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2128 unsigned int length_in_bits;
2130 assert(encode_state->packed_header_param[idx]);
2131 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2132 length_in_bits = param->bit_length;
2134 mfc_context->insert_object(ctx,
2137 ALIGN(length_in_bits, 32) >> 5,
2138 length_in_bits & 0x1f,
2139 5, /* FIXME: check it */
2142 0, /* Needn't insert emulation bytes for MPEG-2 */
2148 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2149 struct encode_state *encode_state,
2150 struct intel_encoder_context *encoder_context,
2152 VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2153 struct intel_batchbuffer *slice_batch)
2155 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2156 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2157 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2158 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2159 unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2160 unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2161 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2162 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2164 int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2165 unsigned int *msg = NULL;
2166 unsigned char *msg_ptr = NULL;
2168 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2169 h_start_pos = slice_param->macroblock_address % width_in_mbs;
2170 v_start_pos = slice_param->macroblock_address / width_in_mbs;
2171 assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2173 dri_bo_map(vme_context->vme_output.bo , 0);
2174 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2176 if (next_slice_group_param) {
2177 h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2178 v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2180 h_next_start_pos = 0;
2181 v_next_start_pos = height_in_mbs;
2184 gen8_mfc_mpeg2_slicegroup_state(ctx,
2191 next_slice_group_param == NULL,
2192 slice_param->is_intra_slice,
2193 slice_param->quantiser_scale_code,
2196 if (slice_index == 0)
2197 intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2199 /* Insert '00' to make sure the header is valid */
2200 mfc_context->insert_object(ctx,
2202 (unsigned int*)section_delimiter,
2204 8, /* 8bits in the last DWORD */
2211 for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2212 /* PAK for each macroblocks */
2213 for (j = 0; j < slice_param->num_macroblocks; j++) {
2214 int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2215 int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2216 int first_mb_in_slice = (j == 0);
2217 int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2218 int first_mb_in_slice_group = (i == 0 && j == 0);
2219 int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2220 j == slice_param->num_macroblocks - 1);
2222 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2224 if (slice_param->is_intra_slice) {
2225 gen8_mfc_mpeg2_pak_object_intra(ctx,
2230 first_mb_in_slice_group,
2231 last_mb_in_slice_group,
2233 slice_param->quantiser_scale_code,
2239 int inter_rdo, intra_rdo;
2240 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2241 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2243 if (intra_rdo < inter_rdo)
2244 gen8_mfc_mpeg2_pak_object_intra(ctx,
2249 first_mb_in_slice_group,
2250 last_mb_in_slice_group,
2252 slice_param->quantiser_scale_code,
2258 gen8_mfc_mpeg2_pak_object_inter(ctx,
2262 width_in_mbs, height_in_mbs,
2266 first_mb_in_slice_group,
2267 last_mb_in_slice_group,
2268 slice_param->quantiser_scale_code,
2278 dri_bo_unmap(vme_context->vme_output.bo);
2281 if (next_slice_group_param == NULL) { /* end of a picture */
2282 mfc_context->insert_object(ctx,
2284 (unsigned int *)tail_delimiter,
2286 8, /* 8bits in the last DWORD */
2292 } else { /* end of a lsice group */
2293 mfc_context->insert_object(ctx,
2295 (unsigned int *)section_delimiter,
2297 8, /* 8bits in the last DWORD */
2307 * A batch buffer for all slices, including slice state,
2308 * slice insert object and slice pak object commands
2312 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2313 struct encode_state *encode_state,
2314 struct intel_encoder_context *encoder_context)
2316 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2317 struct intel_batchbuffer *batch;
2318 VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2322 batch = mfc_context->aux_batchbuffer;
2323 batch_bo = batch->buffer;
2325 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2326 if (i == encode_state->num_slice_params_ext - 1)
2327 next_slice_group_param = NULL;
2329 next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2331 gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2334 intel_batchbuffer_align(batch, 8);
2336 BEGIN_BCS_BATCH(batch, 2);
2337 OUT_BCS_BATCH(batch, 0);
2338 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2339 ADVANCE_BCS_BATCH(batch);
2341 dri_bo_reference(batch_bo);
2342 intel_batchbuffer_free(batch);
2343 mfc_context->aux_batchbuffer = NULL;
2349 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2350 struct encode_state *encode_state,
2351 struct intel_encoder_context *encoder_context)
2353 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2355 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2356 mfc_context->set_surface_state(ctx, encoder_context);
2357 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2358 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2359 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2360 gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2361 gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2362 gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2366 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2367 struct encode_state *encode_state,
2368 struct intel_encoder_context *encoder_context)
2370 struct intel_batchbuffer *batch = encoder_context->base.batch;
2371 dri_bo *slice_batch_bo;
2373 slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2376 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2377 intel_batchbuffer_emit_mi_flush(batch);
2379 // picture level programing
2380 gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2382 BEGIN_BCS_BATCH(batch, 4);
2383 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2384 OUT_BCS_RELOC64(batch,
2386 I915_GEM_DOMAIN_COMMAND, 0,
2388 OUT_BCS_BATCH(batch, 0);
2389 ADVANCE_BCS_BATCH(batch);
2392 intel_batchbuffer_end_atomic(batch);
2394 dri_bo_unreference(slice_batch_bo);
2398 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2399 struct encode_state *encode_state,
2400 struct intel_encoder_context *encoder_context)
2402 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2403 struct object_surface *obj_surface;
2404 struct object_buffer *obj_buffer;
2405 struct i965_coded_buffer_segment *coded_buffer_segment;
2406 VAStatus vaStatus = VA_STATUS_SUCCESS;
2410 /* reconstructed surface */
2411 obj_surface = encode_state->reconstructed_object;
2412 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2413 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2414 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2415 mfc_context->surface_state.width = obj_surface->orig_width;
2416 mfc_context->surface_state.height = obj_surface->orig_height;
2417 mfc_context->surface_state.w_pitch = obj_surface->width;
2418 mfc_context->surface_state.h_pitch = obj_surface->height;
2420 /* forward reference */
2421 obj_surface = encode_state->reference_objects[0];
2423 if (obj_surface && obj_surface->bo) {
2424 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2425 dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2427 mfc_context->reference_surfaces[0].bo = NULL;
2429 /* backward reference */
2430 obj_surface = encode_state->reference_objects[1];
2432 if (obj_surface && obj_surface->bo) {
2433 mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2434 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2436 mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2438 if (mfc_context->reference_surfaces[1].bo)
2439 dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2442 for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2443 mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2445 if (mfc_context->reference_surfaces[i].bo)
2446 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2449 /* input YUV surface */
2450 obj_surface = encode_state->input_yuv_object;
2451 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2452 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2455 obj_buffer = encode_state->coded_buf_object;
2456 bo = obj_buffer->buffer_store->bo;
2457 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2458 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2459 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2460 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2462 /* set the internal flag to 0 to indicate the coded size is unknown */
2464 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2465 coded_buffer_segment->mapped = 0;
2466 coded_buffer_segment->codec = encoder_context->codec;
2473 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2474 struct encode_state *encode_state,
2475 struct intel_encoder_context *encoder_context)
2477 gen8_mfc_init(ctx, encode_state, encoder_context);
2478 intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2479 /*Programing bcs pipeline*/
2480 gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2481 gen8_mfc_run(ctx, encode_state, encoder_context);
2483 return VA_STATUS_SUCCESS;
2486 /* JPEG encode methods */
2489 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2490 struct encode_state *encode_state,
2491 struct intel_encoder_context *encoder_context)
2493 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2494 struct object_surface *obj_surface;
2495 struct object_buffer *obj_buffer;
2496 struct i965_coded_buffer_segment *coded_buffer_segment;
2497 VAStatus vaStatus = VA_STATUS_SUCCESS;
2500 /* input YUV surface */
2501 obj_surface = encode_state->input_yuv_object;
2502 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2503 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2506 obj_buffer = encode_state->coded_buf_object;
2507 bo = obj_buffer->buffer_store->bo;
2508 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2509 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2510 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2511 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2513 /* set the internal flag to 0 to indicate the coded size is unknown */
2515 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2516 coded_buffer_segment->mapped = 0;
2517 coded_buffer_segment->codec = encoder_context->codec;
2525 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2526 struct intel_encoder_context *encoder_context,
2527 struct encode_state *encode_state)
2529 struct intel_batchbuffer *batch = encoder_context->base.batch;
2530 struct object_surface *obj_surface = encode_state->input_yuv_object;
2531 unsigned int input_fourcc;
2532 unsigned int y_cb_offset;
2533 unsigned int y_cr_offset;
2534 unsigned int surface_format;
2536 assert(obj_surface);
2538 y_cb_offset = obj_surface->y_cb_offset;
2539 y_cr_offset = obj_surface->y_cr_offset;
2540 input_fourcc = obj_surface->fourcc;
2542 surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2543 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2546 switch (input_fourcc) {
2547 case VA_FOURCC_Y800: {
2548 surface_format = MFX_SURFACE_MONOCHROME;
2551 case VA_FOURCC_NV12: {
2552 surface_format = MFX_SURFACE_PLANAR_420_8;
2555 case VA_FOURCC_UYVY: {
2556 surface_format = MFX_SURFACE_YCRCB_SWAPY;
2559 case VA_FOURCC_YUY2: {
2560 surface_format = MFX_SURFACE_YCRCB_NORMAL;
2563 case VA_FOURCC_RGBA:
2564 case VA_FOURCC_444P: {
2565 surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2570 BEGIN_BCS_BATCH(batch, 6);
2572 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2573 OUT_BCS_BATCH(batch, 0);
2574 OUT_BCS_BATCH(batch,
2575 ((obj_surface->orig_height - 1) << 18) |
2576 ((obj_surface->orig_width - 1) << 4));
2577 OUT_BCS_BATCH(batch,
2578 (surface_format << 28) | /* Surface Format */
2579 (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2580 (0 << 22) | /* surface object control state, FIXME??? */
2581 ((obj_surface->width - 1) << 3) | /* pitch */
2582 (0 << 2) | /* must be 0 for interleave U/V */
2583 (1 << 1) | /* must be tiled */
2584 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2585 OUT_BCS_BATCH(batch,
2586 (0 << 16) | /* X offset for U(Cb), must be 0 */
2587 (y_cb_offset << 0)); /* Y offset for U(Cb) */
2588 OUT_BCS_BATCH(batch,
2589 (0 << 16) | /* X offset for V(Cr), must be 0 */
2590 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2593 ADVANCE_BCS_BATCH(batch);
2597 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2598 struct intel_encoder_context *encoder_context,
2599 struct encode_state *encode_state)
2601 struct intel_batchbuffer *batch = encoder_context->base.batch;
2602 struct object_surface *obj_surface = encode_state->input_yuv_object;
2603 VAEncPictureParameterBufferJPEG *pic_param;
2604 unsigned int surface_format;
2605 unsigned int frame_width_in_blks;
2606 unsigned int frame_height_in_blks;
2607 unsigned int pixels_in_horizontal_lastMCU;
2608 unsigned int pixels_in_vertical_lastMCU;
2609 unsigned int input_surface_format;
2610 unsigned int output_mcu_format;
2611 unsigned int picture_width;
2612 unsigned int picture_height;
2614 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2615 assert(obj_surface);
2616 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2617 surface_format = obj_surface->fourcc;
2618 picture_width = pic_param->picture_width;
2619 picture_height = pic_param->picture_height;
2621 switch (surface_format) {
2622 case VA_FOURCC_Y800: {
2623 input_surface_format = JPEG_ENC_SURFACE_Y8;
2624 output_mcu_format = JPEG_ENC_MCU_YUV400;
2627 case VA_FOURCC_NV12: {
2628 input_surface_format = JPEG_ENC_SURFACE_NV12;
2629 output_mcu_format = JPEG_ENC_MCU_YUV420;
2632 case VA_FOURCC_UYVY: {
2633 input_surface_format = JPEG_ENC_SURFACE_UYVY;
2634 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2637 case VA_FOURCC_YUY2: {
2638 input_surface_format = JPEG_ENC_SURFACE_YUY2;
2639 output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2643 case VA_FOURCC_RGBA:
2644 case VA_FOURCC_444P: {
2645 input_surface_format = JPEG_ENC_SURFACE_RGB;
2646 output_mcu_format = JPEG_ENC_MCU_RGB;
2650 input_surface_format = JPEG_ENC_SURFACE_NV12;
2651 output_mcu_format = JPEG_ENC_MCU_YUV420;
2657 switch (output_mcu_format) {
2659 case JPEG_ENC_MCU_YUV400:
2660 case JPEG_ENC_MCU_RGB: {
2661 pixels_in_horizontal_lastMCU = (picture_width % 8);
2662 pixels_in_vertical_lastMCU = (picture_height % 8);
2664 //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2665 frame_width_in_blks = ((picture_width + 7) / 8);
2666 frame_height_in_blks = ((picture_height + 7) / 8);
2670 case JPEG_ENC_MCU_YUV420: {
2671 if((picture_width % 2) == 0)
2672 pixels_in_horizontal_lastMCU = picture_width % 16;
2674 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2676 if((picture_height % 2) == 0)
2677 pixels_in_vertical_lastMCU = picture_height % 16;
2679 pixels_in_vertical_lastMCU = ((picture_height % 16) + 1) % 16;
2681 //H1=2,V1=2 for YUV420. So, compute these values accordingly
2682 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2683 frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2687 case JPEG_ENC_MCU_YUV422H_2Y: {
2688 if(picture_width % 2 == 0)
2689 pixels_in_horizontal_lastMCU = picture_width % 16;
2691 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2693 pixels_in_vertical_lastMCU = picture_height % 8;
2695 //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2696 frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2697 frame_height_in_blks = ((picture_height + 7) / 8);
2702 BEGIN_BCS_BATCH(batch, 3);
2704 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2706 OUT_BCS_BATCH(batch,
2707 ( pixels_in_horizontal_lastMCU << 26) | /* Pixels In Horizontal Last MCU */
2708 ( pixels_in_vertical_lastMCU << 21) | /* Pixels In Vertical Last MCU */
2709 ( input_surface_format << 8) | /* Input Surface format */
2710 ( output_mcu_format << 0)); /* Output MCU Structure */
2712 OUT_BCS_BATCH(batch,
2713 ((frame_height_in_blks - 1) << 16) | /* Frame Height In Blks Minus 1 */
2714 (JPEG_ENC_ROUND_QUANT_DEFAULT << 13) | /* Rounding Quant set to default value 0 */
2715 ((frame_width_in_blks - 1) << 0)); /* Frame Width In Blks Minus 1 */
2716 ADVANCE_BCS_BATCH(batch);
2720 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2723 short reciprocal_qm[64];
2725 for(i=0; i<64; i++) {
2726 reciprocal_qm[i] = 65535/(raster_qm[i]);
2729 for(i=0; i<64; i++) {
2730 dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
2739 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2740 struct intel_encoder_context *encoder_context,
2741 struct encode_state *encode_state)
2743 unsigned int quality = 0;
2744 uint32_t temp, i = 0, j = 0, dword_qm[32];
2745 VAEncPictureParameterBufferJPEG *pic_param;
2746 VAQMatrixBufferJPEG *qmatrix;
2747 unsigned char raster_qm[64], column_raster_qm[64];
2748 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2750 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2751 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2752 quality = pic_param->quality;
2754 //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2755 //The app can send qmatrix for the first frame and not send for the subsequent frames
2756 if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
2757 qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2759 mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2760 memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2762 if(pic_param->num_components > 1) {
2763 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2764 memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2766 mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2770 //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2771 qmatrix = &mfc_context->buffered_qmatrix;
2772 qmatrix->load_lum_quantiser_matrix = 1;
2773 qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2777 //As per the design, normalization of the quality factor and scaling of the Quantization tables
2778 //based on the quality factor needs to be done in the driver before sending the values to the HW.
2779 //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2780 //packed header information. The packed header is written as the header of the jpeg file. This
2781 //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2782 //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2783 //how to do this). QTables can be different for different applications. If no tables are provided,
2784 //the default tables in the driver are used.
2786 //Normalization of the quality factor
2787 if (quality > 100) quality=100;
2788 if (quality == 0) quality=1;
2789 quality = (quality < 50) ? (5000/quality) : (200 - (quality*2));
2791 //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2792 //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2793 //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2794 //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2797 if(qmatrix->load_lum_quantiser_matrix) {
2798 //apply quality to lum_quantiser_matrix
2799 for(i=0; i < 64; i++) {
2800 temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
2801 //clamp to range [1,255]
2802 temp = (temp > 255) ? 255 : temp;
2803 temp = (temp < 1) ? 1 : temp;
2804 qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2807 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2808 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2809 for (j = 0; j < 64; j++)
2810 raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2812 //Convert the raster order(row-ordered) to the column-raster (column by column).
2813 //To be consistent with the other encoders, send it in column order.
2814 //Need to double check if our HW expects col or row raster.
2815 for (j = 0; j < 64; j++) {
2816 int row = j / 8, col = j % 8;
2817 column_raster_qm[col * 8 + row] = raster_qm[j];
2820 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2821 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2823 //send the luma qm to the command buffer
2824 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2827 //For Chroma, if chroma exists (Cb, Cr or G, B)
2828 if(qmatrix->load_chroma_quantiser_matrix) {
2829 //apply quality to chroma_quantiser_matrix
2830 for(i=0; i < 64; i++) {
2831 temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
2832 //clamp to range [1,255]
2833 temp = (temp > 255) ? 255 : temp;
2834 temp = (temp < 1) ? 1 : temp;
2835 qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2838 //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2839 //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2840 for (j = 0; j < 64; j++)
2841 raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2843 //Convert the raster order(row-ordered) to the column-raster (column by column).
2844 //To be consistent with the other encoders, send it in column order.
2845 //Need to double check if our HW expects col or row raster.
2846 for (j = 0; j < 64; j++) {
2847 int row = j / 8, col = j % 8;
2848 column_raster_qm[col * 8 + row] = raster_qm[j];
2852 //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2853 get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2855 //send the same chroma qm to the command buffer (for both U,V or G,B)
2856 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2857 gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2862 //Translation of Table K.5 into code: This method takes the huffval from the
2863 //Huffmantable buffer and converts into index for the coefficients and size tables
2864 uint8_t map_huffval_to_index(uint8_t huff_val)
2868 if(huff_val < 0xF0) {
2869 index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2871 index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2878 //Implementation of Flow chart Annex C - Figure C.1
2880 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2882 uint8_t i=1, j=1, k=0;
2885 while(j <= (uint8_t)bits[i-1]) {
2886 huff_size_table[k] = i;
2894 huff_size_table[k] = 0;
2898 //Implementation of Flow chart Annex C - Figure C.2
2900 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2904 uint8_t si=huff_size_table[k];
2906 while(huff_size_table[k] != 0) {
2908 while(huff_size_table[k] == si) {
2910 // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2911 if(code == 0xFFFF) {
2915 huff_code_table[k] = code;
2926 //Implementation of Flow chat Annex C - Figure C.3
2928 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2930 uint8_t huff_val_size=0, i=0, k=0;
2932 huff_val_size = (type == 0) ? 12 : 162;
2933 uint8_t huff_si_table[huff_val_size];
2934 uint16_t huff_co_table[huff_val_size];
2936 memset(huff_si_table, 0, sizeof(huff_si_table));
2937 memset(huff_co_table, 0, sizeof(huff_co_table));
2940 i = map_huffval_to_index(huff_vals[k]);
2941 huff_co_table[i] = huff_code_table[k];
2942 huff_si_table[i] = huff_size_table[k];
2946 memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2947 memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2951 //This method converts the huffman table to code words which is needed by the HW
2952 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2954 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2956 uint8_t lastK = 0, i=0;
2957 uint8_t huff_val_size = 0;
2958 uint8_t *huff_bits, *huff_vals;
2960 huff_val_size = (type == 0) ? 12 : 162;
2961 uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
2962 uint16_t huff_code_table[huff_val_size];
2964 memset(huff_size_table, 0, sizeof(huff_size_table));
2965 memset(huff_code_table, 0, sizeof(huff_code_table));
2967 huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2968 huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2971 //Generation of table of Huffman code sizes
2972 generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2974 //Generation of table of Huffman codes
2975 generate_huffman_codes_table(huff_size_table, huff_code_table);
2977 //Ordering procedure for encoding procedure code tables
2978 generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2980 //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2981 //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2982 for(i=0; i<huff_val_size; i++) {
2984 table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2989 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2991 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2992 struct encode_state *encode_state,
2993 struct intel_encoder_context *encoder_context,
2996 VAHuffmanTableBufferJPEGBaseline *huff_buffer;
2997 struct intel_batchbuffer *batch = encoder_context->base.batch;
2999 uint32_t dc_table[12], ac_table[162];
3001 assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3002 huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3004 memset(dc_table, 0, 12);
3005 memset(ac_table, 0, 162);
3007 for (index = 0; index < num_tables; index++) {
3008 int id = va_to_gen7_jpeg_hufftable[index];
3010 if (!huff_buffer->load_huffman_table[index])
3013 //load DC table with 12 DWords
3014 convert_hufftable_to_codes(huff_buffer, dc_table, 0, index); //0 for Dc
3016 //load AC table with 162 DWords
3017 convert_hufftable_to_codes(huff_buffer, ac_table, 1, index); //1 for AC
3019 BEGIN_BCS_BATCH(batch, 176);
3020 OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3021 OUT_BCS_BATCH(batch, id); //Huff table id
3023 //DWord 2 - 13 has DC_TABLE
3024 intel_batchbuffer_data(batch, dc_table, 12*4);
3026 //Dword 14 -175 has AC_TABLE
3027 intel_batchbuffer_data(batch, ac_table, 162*4);
3028 ADVANCE_BCS_BATCH(batch);
3033 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3034 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3036 switch (surface_format) {
3037 case VA_FOURCC_Y800: {
3042 case VA_FOURCC_NV12: {
3047 case VA_FOURCC_UYVY: {
3052 case VA_FOURCC_YUY2: {
3057 case VA_FOURCC_RGBA:
3058 case VA_FOURCC_444P: {
3063 default : { //May be have to insert error handling here. For now just use as below
3071 //set MFC_JPEG_SCAN_OBJECT
3073 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3074 struct encode_state *encode_state,
3075 struct intel_encoder_context *encoder_context)
3077 uint32_t mcu_count, surface_format, Mx, My;
3078 uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
3079 uint8_t is_last_scan = 1; //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3080 uint8_t head_present_flag=1; //Header has tables and app data
3081 uint16_t num_components, restart_interval; //Specifies number of MCUs in an ECS.
3082 VAEncSliceParameterBufferJPEG *slice_param;
3083 VAEncPictureParameterBufferJPEG *pic_param;
3085 struct intel_batchbuffer *batch = encoder_context->base.batch;
3086 struct object_surface *obj_surface = encode_state->input_yuv_object;
3088 assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3089 assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3090 assert(obj_surface);
3091 pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3092 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3093 surface_format = obj_surface->fourcc;
3095 get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3097 // Mx = #MCUs in a row, My = #MCUs in a column
3098 Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
3099 My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
3100 mcu_count = (Mx * My);
3102 num_components = pic_param->num_components;
3103 restart_interval = slice_param->restart_interval;
3105 //Depending on number of components and values set for table selectors,
3106 //only those bits are set in 24:22 for AC table, 20:18 for DC table
3107 for(i=0; i<num_components; i++) {
3108 huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
3109 huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
3113 BEGIN_BCS_BATCH(batch, 3);
3115 OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3117 OUT_BCS_BATCH(batch, mcu_count << 0); //MCU Count
3119 OUT_BCS_BATCH(batch,
3120 (huff_ac_table << 22) | //Huffman AC Table
3121 (huff_dc_table << 18) | //Huffman DC Table
3122 (head_present_flag << 17) | //Head present flag
3123 (is_last_scan << 16) | //Is last scan
3124 (restart_interval << 0)); //Restart Interval
3125 ADVANCE_BCS_BATCH(batch);
3129 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3130 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3131 int is_end_of_slice)
3133 struct intel_batchbuffer *batch = encoder_context->base.batch;
3136 if (data_bits_in_last_dw == 0)
3137 data_bits_in_last_dw = 32;
3139 BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3141 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3143 OUT_BCS_BATCH(batch,
3144 (0 << 16) | //DataByteOffset 0 for JPEG Encoder
3145 (0 << 15) | //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3146 (data_bits_in_last_dw << 8) | //DataBitsInLastDW
3147 (0 << 4) | //SkipEmulByteCount 0 for JPEG Encoder
3148 (0 << 3) | //EmulationFlag 0 for JPEG Encoder
3149 ((!!is_last_header) << 2) | //LastHeaderFlag
3150 ((!!is_end_of_slice) << 1) | //EndOfSliceFlag
3151 (1 << 0)); //BitstreamStartReset 1 for JPEG Encoder
3153 intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
3155 ADVANCE_BCS_BATCH(batch);
3159 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3161 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3162 struct encode_state *encode_state,
3163 struct intel_encoder_context *encoder_context)
3165 if (encode_state->packed_header_data_ext) {
3166 VAEncPackedHeaderParameterBuffer *param = NULL;
3167 unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3168 unsigned int length_in_bits;
3170 param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3171 length_in_bits = param->bit_length;
3173 gen8_mfc_jpeg_pak_insert_object(encoder_context,
3175 ALIGN(length_in_bits, 32) >> 5,
3176 length_in_bits & 0x1f,
3182 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3183 //If the app sends the qmatrix, this will be replaced with the one app sends.
3185 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3188 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3190 //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3192 mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3195 mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3198 /* This is at the picture level */
3200 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3201 struct encode_state *encode_state,
3202 struct intel_encoder_context *encoder_context)
3204 int i, j, component, max_selector = 0;
3205 VAEncSliceParameterBufferJPEG *slice_param;
3207 gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3208 gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3209 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3210 gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3211 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3212 gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3214 //do the slice level encoding here
3215 gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3217 //I dont think I need this for loop. Just to be consistent with other encoding logic...
3218 for(i = 0; i < encode_state->num_slice_params_ext; i++) {
3219 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3220 slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3222 for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3224 for(component = 0; component < slice_param->num_components; component++) {
3225 if(max_selector < slice_param->components[component].dc_table_selector)
3226 max_selector = slice_param->components[component].dc_table_selector;
3228 if (max_selector < slice_param->components[component].ac_table_selector)
3229 max_selector = slice_param->components[component].ac_table_selector;
3236 assert(max_selector < 2);
3237 //send the huffman table using MFC_JPEG_HUFF_TABLE
3238 gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
3239 //set MFC_JPEG_SCAN_OBJECT
3240 gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3241 //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3242 gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3247 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3248 struct encode_state *encode_state,
3249 struct intel_encoder_context *encoder_context)
3251 struct intel_batchbuffer *batch = encoder_context->base.batch;
3254 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3255 intel_batchbuffer_emit_mi_flush(batch);
3257 // picture level programing
3258 gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3261 intel_batchbuffer_end_atomic(batch);
3267 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3268 struct encode_state *encode_state,
3269 struct intel_encoder_context *encoder_context)
3271 gen8_mfc_init(ctx, encode_state, encoder_context);
3272 intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3273 /*Programing bcs pipeline*/
3274 gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3275 gen8_mfc_run(ctx, encode_state, encoder_context);
3277 return VA_STATUS_SUCCESS;
3280 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3281 struct gen6_mfc_context *mfc_context,
3282 int target_frame_size,
3285 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3286 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3287 unsigned int max_qindex = pic_param->clamp_qindex_high;
3288 unsigned int min_qindex = pic_param->clamp_qindex_low;
3289 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3290 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3292 int last_size_gap = -1;
3293 int per_mb_size_at_qindex;
3294 int target_qindex = min_qindex, i;
3296 /* make sure would not overflow*/
3297 if (target_frame_size >= (0x7fffffff >> 9))
3298 target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3300 target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3302 for (i = min_qindex; i <= max_qindex; i++) {
3303 per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3305 if (per_mb_size_at_qindex <= target_mb_size) {
3306 if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3311 last_size_gap = per_mb_size_at_qindex - target_mb_size;
3314 return target_qindex;
3317 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3318 struct intel_encoder_context* encoder_context)
3320 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3321 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3322 double bitrate = encoder_context->brc.bits_per_second[0];
3323 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
3324 int inum = 1, pnum = 0;
3325 int intra_period = seq_param->intra_period;
3326 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3327 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3328 int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
3330 pnum = intra_period - 1;
3332 mfc_context->brc.mode = encoder_context->rate_control_mode;
3334 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
3335 (double)(inum + BRC_PWEIGHT * pnum ));
3336 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3338 mfc_context->brc.gop_nums[0][SLICE_TYPE_I] = inum;
3339 mfc_context->brc.gop_nums[0][SLICE_TYPE_P] = pnum;
3341 mfc_context->brc.bits_per_frame[0] = bitrate / framerate;
3343 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3345 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3347 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3349 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3352 if (encoder_context->brc.hrd_buffer_size)
3353 mfc_context->hrd.buffer_size[0] = (double)encoder_context->brc.hrd_buffer_size;
3355 mfc_context->hrd.buffer_size[0] = bitrate;
3356 if (encoder_context->brc.hrd_initial_buffer_fullness &&
3357 encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size[0])
3358 mfc_context->hrd.current_buffer_fullness[0] = (double)encoder_context->brc.hrd_initial_buffer_fullness;
3360 mfc_context->hrd.current_buffer_fullness[0] = mfc_context->hrd.buffer_size[0] / 2.0;
3361 mfc_context->hrd.target_buffer_fullness[0] = (double)mfc_context->hrd.buffer_size[0] / 2.0;
3362 mfc_context->hrd.buffer_capacity[0] = (double)mfc_context->hrd.buffer_size[0] / max_frame_size;
3363 mfc_context->hrd.violation_noted = 0;
3366 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3367 struct intel_encoder_context *encoder_context,
3370 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3371 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3372 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3373 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3374 int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3375 int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3376 int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3377 int qp; // quantizer of previously encoded slice of current type
3378 int qpn; // predicted quantizer for next frame of current type in integer format
3379 double qpf; // predicted quantizer for next frame of current type in float format
3380 double delta_qp; // QP correction
3381 int target_frame_size, frame_size_next;
3383 * x - how far we are from HRD buffer borders
3384 * y - how far we are from target HRD buffer fullness
3387 double frame_size_alpha;
3388 unsigned int max_qindex = pic_param->clamp_qindex_high;
3389 unsigned int min_qindex = pic_param->clamp_qindex_low;
3391 qp = mfc_context->brc.qp_prime_y[0][slicetype];
3393 target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3394 if (mfc_context->hrd.buffer_capacity[0] < 5)
3395 frame_size_alpha = 0;
3397 frame_size_alpha = (double)mfc_context->brc.gop_nums[0][slicetype];
3398 if (frame_size_alpha > 30) frame_size_alpha = 30;
3399 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3400 (double)(frame_size_alpha + 1.);
3402 /* frame_size_next: avoiding negative number and too small value */
3403 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3404 frame_size_next = (int)((double)target_frame_size * 0.25);
3406 qpf = (double)qp * target_frame_size / frame_size_next;
3407 qpn = (int)(qpf + 0.5);
3410 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3411 mfc_context->brc.qpf_rounding_accumulator[0] += qpf - qpn;
3412 if (mfc_context->brc.qpf_rounding_accumulator[0] > 1.0) {
3414 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3415 } else if (mfc_context->brc.qpf_rounding_accumulator[0] < -1.0) {
3417 mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3421 /* making sure that QP is not changing too fast */
3422 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3423 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3424 /* making sure that with QP predictions we did do not leave QPs range */
3425 BRC_CLIP(qpn, min_qindex, max_qindex);
3427 /* checking wthether HRD compliance is still met */
3428 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3430 /* calculating QP delta as some function*/
3431 x = mfc_context->hrd.target_buffer_fullness[0] - mfc_context->hrd.current_buffer_fullness[0];
3433 x /= mfc_context->hrd.target_buffer_fullness[0];
3434 y = mfc_context->hrd.current_buffer_fullness[0];
3437 x /= (mfc_context->hrd.buffer_size[0] - mfc_context->hrd.target_buffer_fullness[0]);
3438 y = mfc_context->hrd.buffer_size[0] - mfc_context->hrd.current_buffer_fullness[0];
3440 if (y < 0.01) y = 0.01;
3442 else if (x < -1) x = -1;
3444 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
3445 qpn = (int)(qpn + delta_qp + 0.5);
3447 /* making sure that with QP predictions we did do not leave QPs range */
3448 BRC_CLIP(qpn, min_qindex, max_qindex);
3450 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3451 /* correcting QPs of slices of other types */
3452 if (!is_key_frame) {
3453 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3454 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3456 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3457 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3459 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3460 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3461 } else if (sts == BRC_UNDERFLOW) { // underflow
3462 if (qpn <= qp) qpn = qp + 2;
3463 if (qpn > max_qindex) {
3465 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3467 } else if (sts == BRC_OVERFLOW) {
3468 if (qpn >= qp) qpn = qp - 2;
3469 if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3471 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3475 mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3480 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3481 struct intel_encoder_context *encoder_context)
3483 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3484 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3485 int target_bit_rate = encoder_context->brc.bits_per_second[0];
3487 // current we only support CBR mode.
3488 if (rate_control_mode == VA_RC_CBR) {
3489 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3490 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3491 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3492 mfc_context->vui_hrd.i_frame_number = 0;
3494 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3495 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3496 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3501 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3502 struct gen6_mfc_context *mfc_context)
3504 mfc_context->vui_hrd.i_frame_number++;
3507 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3508 struct intel_encoder_context *encoder_context)
3510 unsigned int rate_control_mode = encoder_context->rate_control_mode;
3512 if (rate_control_mode == VA_RC_CBR) {
3514 assert(encoder_context->codec != CODEC_MPEG2);
3516 brc_updated = encoder_context->brc.need_reset;
3518 /*Programing bit rate control */
3520 gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3523 /*Programing HRD control */
3525 gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3529 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3530 VAEncPictureParameterBufferVP8 *pic_param,
3531 VAQMatrixBufferVP8 *q_matrix)
3534 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3535 unsigned char *coeff_probs_stream_in_buffer;
3537 mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3538 mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3539 mfc_context->vp8_state.frame_header_token_update_pos = 0;
3540 mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3542 mfc_context->vp8_state.prob_skip_false = 255;
3543 memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3544 memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3547 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3548 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3550 mfc_context->vp8_state.prob_intra = 255;
3551 mfc_context->vp8_state.prob_last = 128;
3552 mfc_context->vp8_state.prob_gf = 128;
3554 memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3555 memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3557 mfc_context->vp8_state.prob_intra = 63;
3558 mfc_context->vp8_state.prob_last = 128;
3559 mfc_context->vp8_state.prob_gf = 128;
3562 mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3564 dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3565 coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3566 assert(coeff_probs_stream_in_buffer);
3567 memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3568 dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3571 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3572 VAQMatrixBufferVP8 *q_matrix)
3575 /*some other probabilities need to be updated*/
3578 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3579 VAEncPictureParameterBufferVP8 *pic_param,
3580 VAQMatrixBufferVP8 *q_matrix,
3581 struct gen6_mfc_context *mfc_context,
3582 struct intel_encoder_context *encoder_context);
3584 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3585 struct intel_encoder_context *encoder_context,
3586 struct gen6_mfc_context *mfc_context)
3588 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3589 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3590 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3591 unsigned char *frame_header_buffer;
3593 binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3595 dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3596 frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3597 assert(frame_header_buffer);
3598 memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3599 free(mfc_context->vp8_state.vp8_frame_header);
3600 dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3603 #define MAX_VP8_FRAME_HEADER_SIZE 0x2000
3604 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
3606 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3607 struct encode_state *encode_state,
3608 struct intel_encoder_context *encoder_context)
3610 struct i965_driver_data *i965 = i965_driver_data(ctx);
3611 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3614 int width_in_mbs = 0;
3615 int height_in_mbs = 0;
3616 int slice_batchbuffer_size;
3617 int is_key_frame, slice_type, rate_control_mode;
3619 VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3620 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3621 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3623 width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3624 height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3626 is_key_frame = !pic_param->pic_flags.bits.frame_type;
3627 slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3628 rate_control_mode = encoder_context->rate_control_mode;
3630 if (rate_control_mode == VA_RC_CBR) {
3631 q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3632 for (i = 1; i < 4; i++)
3633 q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3634 for (i = 0; i < 5; i++)
3635 q_matrix->quantization_index_delta[i] = 0;
3638 slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3639 (SLICE_HEADER + SLICE_TAIL);
3641 /*Encode common setup for MFC*/
3642 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3643 mfc_context->post_deblocking_output.bo = NULL;
3645 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3646 mfc_context->pre_deblocking_output.bo = NULL;
3648 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3649 mfc_context->uncompressed_picture_source.bo = NULL;
3651 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3652 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3654 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3655 if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3656 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3657 mfc_context->direct_mv_buffers[i].bo = NULL;
3660 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3661 if (mfc_context->reference_surfaces[i].bo != NULL)
3662 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3663 mfc_context->reference_surfaces[i].bo = NULL;
3666 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3667 bo = dri_bo_alloc(i965->intel.bufmgr,
3669 width_in_mbs * 64 * 16,
3672 mfc_context->intra_row_store_scratch_buffer.bo = bo;
3674 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3675 bo = dri_bo_alloc(i965->intel.bufmgr,
3677 width_in_mbs * height_in_mbs * 16,
3680 mfc_context->macroblock_status_buffer.bo = bo;
3682 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3683 bo = dri_bo_alloc(i965->intel.bufmgr,
3685 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3688 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3690 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3691 bo = dri_bo_alloc(i965->intel.bufmgr,
3693 16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3696 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3698 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3699 mfc_context->mfc_batchbuffer_surface.bo = NULL;
3701 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3702 mfc_context->aux_batchbuffer_surface.bo = NULL;
3704 if (mfc_context->aux_batchbuffer) {
3705 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3706 mfc_context->aux_batchbuffer = NULL;
3709 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3710 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3711 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3712 mfc_context->aux_batchbuffer_surface.pitch = 16;
3713 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3714 mfc_context->aux_batchbuffer_surface.size_block = 16;
3716 gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3718 /* alloc vp8 encoding buffers*/
3719 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3720 bo = dri_bo_alloc(i965->intel.bufmgr,
3722 MAX_VP8_FRAME_HEADER_SIZE,
3725 mfc_context->vp8_state.frame_header_bo = bo;
3727 mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3728 for(i = 0; i < 8; i++) {
3729 mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3731 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3732 bo = dri_bo_alloc(i965->intel.bufmgr,
3734 mfc_context->vp8_state.intermediate_buffer_max_size,
3737 mfc_context->vp8_state.intermediate_bo = bo;
3739 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3740 bo = dri_bo_alloc(i965->intel.bufmgr,
3742 width_in_mbs * height_in_mbs * 16,
3745 mfc_context->vp8_state.stream_out_bo = bo;
3747 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3748 bo = dri_bo_alloc(i965->intel.bufmgr,
3750 sizeof(vp8_default_coef_probs),
3753 mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3755 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3756 bo = dri_bo_alloc(i965->intel.bufmgr,
3758 VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3761 mfc_context->vp8_state.token_statistics_bo = bo;
3763 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3764 bo = dri_bo_alloc(i965->intel.bufmgr,
3766 width_in_mbs * 16 * 64,
3769 mfc_context->vp8_state.mpc_row_store_bo = bo;
3771 vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3772 vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3776 intel_mfc_vp8_prepare(VADriverContextP ctx,
3777 struct encode_state *encode_state,
3778 struct intel_encoder_context *encoder_context)
3780 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3781 struct object_surface *obj_surface;
3782 struct object_buffer *obj_buffer;
3783 struct i965_coded_buffer_segment *coded_buffer_segment;
3784 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3785 VAStatus vaStatus = VA_STATUS_SUCCESS;
3789 /* reconstructed surface */
3790 obj_surface = encode_state->reconstructed_object;
3791 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3792 if (pic_param->loop_filter_level[0] == 0) {
3793 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3794 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3796 mfc_context->post_deblocking_output.bo = obj_surface->bo;
3797 dri_bo_reference(mfc_context->post_deblocking_output.bo);
3800 mfc_context->surface_state.width = obj_surface->orig_width;
3801 mfc_context->surface_state.height = obj_surface->orig_height;
3802 mfc_context->surface_state.w_pitch = obj_surface->width;
3803 mfc_context->surface_state.h_pitch = obj_surface->height;
3805 /* set vp8 reference frames */
3806 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3807 obj_surface = encode_state->reference_objects[i];
3809 if (obj_surface && obj_surface->bo) {
3810 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3811 dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3813 mfc_context->reference_surfaces[i].bo = NULL;
3817 /* input YUV surface */
3818 obj_surface = encode_state->input_yuv_object;
3819 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3820 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3823 obj_buffer = encode_state->coded_buf_object;
3824 bo = obj_buffer->buffer_store->bo;
3825 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3826 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3827 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3828 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3830 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3831 mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3832 mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3833 dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3835 /* set the internal flag to 0 to indicate the coded size is unknown */
3837 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3838 coded_buffer_segment->mapped = 0;
3839 coded_buffer_segment->codec = encoder_context->codec;
3846 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3847 struct encode_state *encode_state,
3848 struct intel_encoder_context *encoder_context)
3850 struct intel_batchbuffer *batch = encoder_context->base.batch;
3851 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3852 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3853 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3855 BEGIN_BCS_BATCH(batch, 30);
3856 OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3858 OUT_BCS_BATCH(batch,
3859 0 << 9 | /* compressed bitstream output disable */
3860 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3861 1 << 6 | /* RC initial pass */
3862 0 << 4 | /* upate segment feature date flag */
3863 1 << 3 | /* bitstream statistics output enable */
3864 1 << 2 | /* token statistics output enable */
3865 0 << 1 | /* final bitstream output disable */
3868 OUT_BCS_BATCH(batch, 0); /*DW2*/
3870 OUT_BCS_BATCH(batch,
3871 0xfff << 16 | /* max intra mb bit count limit */
3872 0xfff << 0 /* max inter mb bit count limit */
3875 OUT_BCS_BATCH(batch, 0); /*DW4*/
3876 OUT_BCS_BATCH(batch, 0); /*DW5*/
3877 OUT_BCS_BATCH(batch, 0); /*DW6*/
3878 OUT_BCS_BATCH(batch, 0); /*DW7*/
3879 OUT_BCS_BATCH(batch, 0); /*DW8*/
3880 OUT_BCS_BATCH(batch, 0); /*DW9*/
3881 OUT_BCS_BATCH(batch, 0); /*DW10*/
3882 OUT_BCS_BATCH(batch, 0); /*DW11*/
3883 OUT_BCS_BATCH(batch, 0); /*DW12*/
3884 OUT_BCS_BATCH(batch, 0); /*DW13*/
3885 OUT_BCS_BATCH(batch, 0); /*DW14*/
3886 OUT_BCS_BATCH(batch, 0); /*DW15*/
3887 OUT_BCS_BATCH(batch, 0); /*DW16*/
3888 OUT_BCS_BATCH(batch, 0); /*DW17*/
3889 OUT_BCS_BATCH(batch, 0); /*DW18*/
3890 OUT_BCS_BATCH(batch, 0); /*DW19*/
3891 OUT_BCS_BATCH(batch, 0); /*DW20*/
3892 OUT_BCS_BATCH(batch, 0); /*DW21*/
3894 OUT_BCS_BATCH(batch,
3895 pic_param->pic_flags.bits.show_frame << 23 |
3896 pic_param->pic_flags.bits.version << 20
3899 OUT_BCS_BATCH(batch,
3900 (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3901 (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3905 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3908 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3911 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3914 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3917 OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3920 OUT_BCS_BATCH(batch, 0);
3922 ADVANCE_BCS_BATCH(batch);
3926 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3927 struct encode_state *encode_state,
3928 struct intel_encoder_context *encoder_context)
3930 struct intel_batchbuffer *batch = encoder_context->base.batch;
3931 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3932 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3933 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3934 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3937 log2num = pic_param->pic_flags.bits.num_token_partitions;
3939 /*update mode and token probs*/
3940 vp8_enc_state_update(mfc_context, q_matrix);
3942 BEGIN_BCS_BATCH(batch, 38);
3943 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3944 OUT_BCS_BATCH(batch,
3945 (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3946 (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3948 OUT_BCS_BATCH(batch,
3950 pic_param->sharpness_level << 16 |
3951 pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3952 pic_param->pic_flags.bits.sign_bias_golden << 12 |
3953 pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3954 pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3955 pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3956 pic_param->pic_flags.bits.segmentation_enabled << 8 |
3957 !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3958 (pic_param->pic_flags.bits.version / 2) << 4 |
3959 (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3960 !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3962 OUT_BCS_BATCH(batch,
3963 pic_param->loop_filter_level[3] << 24 |
3964 pic_param->loop_filter_level[2] << 16 |
3965 pic_param->loop_filter_level[1] << 8 |
3966 pic_param->loop_filter_level[0] << 0);
3968 OUT_BCS_BATCH(batch,
3969 q_matrix->quantization_index[3] << 24 |
3970 q_matrix->quantization_index[2] << 16 |
3971 q_matrix->quantization_index[1] << 8 |
3972 q_matrix->quantization_index[0] << 0);
3974 OUT_BCS_BATCH(batch,
3975 ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3976 abs(q_matrix->quantization_index_delta[4]) << 24 |
3977 ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3978 abs(q_matrix->quantization_index_delta[3]) << 16 |
3979 ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3980 abs(q_matrix->quantization_index_delta[2]) << 8 |
3981 ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3982 abs(q_matrix->quantization_index_delta[1]) << 0);
3984 OUT_BCS_BATCH(batch,
3985 ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3986 abs(q_matrix->quantization_index_delta[0]) << 0);
3988 OUT_BCS_BATCH(batch,
3989 pic_param->clamp_qindex_high << 8 |
3990 pic_param->clamp_qindex_low << 0);
3992 for (i = 8; i < 19; i++) {
3993 OUT_BCS_BATCH(batch, 0xffffffff);
3996 OUT_BCS_BATCH(batch,
3997 mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
3998 mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
3999 mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
4001 OUT_BCS_BATCH(batch,
4002 mfc_context->vp8_state.prob_skip_false << 24 |
4003 mfc_context->vp8_state.prob_intra << 16 |
4004 mfc_context->vp8_state.prob_last << 8 |
4005 mfc_context->vp8_state.prob_gf << 0);
4007 OUT_BCS_BATCH(batch,
4008 mfc_context->vp8_state.y_mode_probs[3] << 24 |
4009 mfc_context->vp8_state.y_mode_probs[2] << 16 |
4010 mfc_context->vp8_state.y_mode_probs[1] << 8 |
4011 mfc_context->vp8_state.y_mode_probs[0] << 0);
4013 OUT_BCS_BATCH(batch,
4014 mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4015 mfc_context->vp8_state.uv_mode_probs[1] << 8 |
4016 mfc_context->vp8_state.uv_mode_probs[0] << 0);
4018 /* MV update value, DW23-DW32 */
4019 for (i = 0; i < 2; i++) {
4020 for (j = 0; j < 20; j += 4) {
4021 OUT_BCS_BATCH(batch,
4022 (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4023 mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4024 mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
4025 mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
4029 OUT_BCS_BATCH(batch,
4030 (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4031 (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4032 (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
4033 (pic_param->ref_lf_delta[0] & 0x7f) << 0);
4035 OUT_BCS_BATCH(batch,
4036 (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4037 (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4038 (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
4039 (pic_param->mode_lf_delta[0] & 0x7f) << 0);
4041 OUT_BCS_BATCH(batch, 0);
4042 OUT_BCS_BATCH(batch, 0);
4043 OUT_BCS_BATCH(batch, 0);
4045 ADVANCE_BCS_BATCH(batch);
4048 #define OUT_VP8_BUFFER(bo, offset) \
4050 OUT_BCS_RELOC64(batch, \
4052 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4055 OUT_BCS_BATCH(batch, 0); \
4056 OUT_BCS_BATCH(batch, 0); \
4058 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4061 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4062 struct encode_state *encode_state,
4063 struct intel_encoder_context *encoder_context)
4065 struct i965_driver_data *i965 = i965_driver_data(ctx);
4066 struct intel_batchbuffer *batch = encoder_context->base.batch;
4067 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4069 BEGIN_BCS_BATCH(batch, 32);
4070 OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4072 OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4074 OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4075 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4076 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4077 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4078 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4079 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4080 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4081 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4082 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4083 OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4085 OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4086 OUT_BCS_BATCH(batch, 0);
4088 OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4089 OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4090 OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4091 OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4093 ADVANCE_BCS_BATCH(batch);
4097 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4098 struct encode_state *encode_state,
4099 struct intel_encoder_context *encoder_context)
4101 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4103 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4104 mfc_context->set_surface_state(ctx, encoder_context);
4105 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4106 gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4107 gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4108 gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4109 gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
4110 gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4113 static const unsigned char
4114 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4121 static const unsigned char
4122 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4134 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4136 unsigned int i, pak_pred_mode = 0;
4137 unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4140 pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4142 for (i = 0; i < 8; i++) {
4143 vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4144 assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4145 pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4146 pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4150 return pak_pred_mode;
4153 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4154 struct intel_encoder_context *encoder_context,
4157 struct intel_batchbuffer *batch)
4159 unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4160 unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4161 unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4164 batch = encoder_context->base.batch;
4166 vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4167 assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4168 pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4170 vme_luma_pred_mode[0] = msg[1];
4171 vme_luma_pred_mode[1] = msg[2];
4172 vme_chroma_pred_mode = msg[3] & 0x3;
4174 pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4175 pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4176 pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4178 BEGIN_BCS_BATCH(batch, 7);
4180 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4181 OUT_BCS_BATCH(batch, 0);
4182 OUT_BCS_BATCH(batch, 0);
4183 OUT_BCS_BATCH(batch,
4184 (0 << 20) | /* mv format: intra mb */
4185 (0 << 18) | /* Segment ID */
4186 (0 << 17) | /* disable coeff clamp */
4187 (1 << 13) | /* intra mb flag */
4188 (0 << 11) | /* refer picture select: last frame */
4189 (pak_intra_mb_mode << 8) | /* mb type */
4190 (pak_chroma_pred_mode << 4) | /* mb uv mode */
4191 (0 << 2) | /* skip mb flag: disable */
4194 OUT_BCS_BATCH(batch, (y << 16) | x);
4195 OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4196 OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4198 ADVANCE_BCS_BATCH(batch);
4202 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4203 struct intel_encoder_context *encoder_context,
4207 struct intel_batchbuffer *batch)
4212 batch = encoder_context->base.batch;
4214 /* only support inter_16x16 now */
4215 assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4216 /* for inter_16x16, all 16 MVs should be same,
4217 * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4218 * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4220 msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
4222 for (i = 1; i < 16; i++) {
4226 BEGIN_BCS_BATCH(batch, 7);
4228 OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4229 OUT_BCS_BATCH(batch,
4230 (0 << 29) | /* enable inline mv data: disable */
4232 OUT_BCS_BATCH(batch,
4234 OUT_BCS_BATCH(batch,
4235 (4 << 20) | /* mv format: inter */
4236 (0 << 18) | /* Segment ID */
4237 (0 << 17) | /* coeff clamp: disable */
4238 (0 << 13) | /* intra mb flag: inter mb */
4239 (0 << 11) | /* refer picture select: last frame */
4240 (0 << 8) | /* mb type: 16x16 */
4241 (0 << 4) | /* mb uv mode: dc_pred */
4242 (0 << 2) | /* skip mb flag: disable */
4245 OUT_BCS_BATCH(batch, (y << 16) | x);
4248 OUT_BCS_BATCH(batch, 0x8);
4249 OUT_BCS_BATCH(batch, 0x8);
4251 ADVANCE_BCS_BATCH(batch);
4255 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4256 struct encode_state *encode_state,
4257 struct intel_encoder_context *encoder_context,
4258 struct intel_batchbuffer *slice_batch)
4260 struct gen6_vme_context *vme_context = encoder_context->vme_context;
4261 VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4262 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4263 int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4264 int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4265 unsigned int *msg = NULL;
4266 unsigned char *msg_ptr = NULL;
4267 unsigned int i, offset, is_intra_frame;
4269 is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4271 dri_bo_map(vme_context->vme_output.bo , 1);
4272 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4274 for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
4275 int h_pos = i % width_in_mbs;
4276 int v_pos = i / width_in_mbs;
4277 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
4279 if (is_intra_frame) {
4280 gen8_mfc_vp8_pak_object_intra(ctx,
4286 int inter_rdo, intra_rdo;
4287 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4288 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4290 if (intra_rdo < inter_rdo) {
4291 gen8_mfc_vp8_pak_object_intra(ctx,
4297 offset = i * vme_context->vme_output.size_block;
4298 gen8_mfc_vp8_pak_object_inter(ctx,
4308 dri_bo_unmap(vme_context->vme_output.bo);
4312 * A batch buffer for vp8 pak object commands
4315 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4316 struct encode_state *encode_state,
4317 struct intel_encoder_context *encoder_context)
4319 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4320 struct intel_batchbuffer *batch;
4323 batch = mfc_context->aux_batchbuffer;
4324 batch_bo = batch->buffer;
4326 gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4328 intel_batchbuffer_align(batch, 8);
4330 BEGIN_BCS_BATCH(batch, 2);
4331 OUT_BCS_BATCH(batch, 0);
4332 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4333 ADVANCE_BCS_BATCH(batch);
4335 dri_bo_reference(batch_bo);
4336 intel_batchbuffer_free(batch);
4337 mfc_context->aux_batchbuffer = NULL;
4343 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4344 struct encode_state *encode_state,
4345 struct intel_encoder_context *encoder_context)
4347 struct intel_batchbuffer *batch = encoder_context->base.batch;
4348 dri_bo *slice_batch_bo;
4350 slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4353 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4354 intel_batchbuffer_emit_mi_flush(batch);
4356 // picture level programing
4357 gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4359 BEGIN_BCS_BATCH(batch, 4);
4360 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4361 OUT_BCS_RELOC64(batch,
4363 I915_GEM_DOMAIN_COMMAND, 0,
4365 OUT_BCS_BATCH(batch, 0);
4366 ADVANCE_BCS_BATCH(batch);
4369 intel_batchbuffer_end_atomic(batch);
4371 dri_bo_unreference(slice_batch_bo);
4374 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4375 struct encode_state *encode_state,
4376 struct intel_encoder_context *encoder_context)
4378 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4379 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4380 unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4381 unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4383 int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4385 first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4387 dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4389 vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4390 first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4392 for (i = 1; i <= partition_num; i++)
4393 token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4395 /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4396 /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4397 vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4399 dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4401 dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4402 struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4403 coded_buffer_segment->base.size = vp8_coded_bytes;
4404 dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4406 return vp8_coded_bytes;
4410 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4411 struct encode_state *encode_state,
4412 struct intel_encoder_context *encoder_context)
4414 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4415 unsigned int rate_control_mode = encoder_context->rate_control_mode;
4416 int current_frame_bits_size;
4419 gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4420 intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4421 /*Programing bcs pipeline*/
4422 gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4423 gen8_mfc_run(ctx, encode_state, encoder_context);
4424 current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4426 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4427 sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4428 if (sts == BRC_NO_HRD_VIOLATION) {
4429 gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4431 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4432 if (!mfc_context->hrd.violation_noted) {
4433 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
4434 mfc_context->hrd.violation_noted = 1;
4436 return VA_STATUS_SUCCESS;
4440 return VA_STATUS_SUCCESS;
4444 gen8_mfc_context_destroy(void *context)
4446 struct gen6_mfc_context *mfc_context = context;
4449 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4450 mfc_context->post_deblocking_output.bo = NULL;
4452 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4453 mfc_context->pre_deblocking_output.bo = NULL;
4455 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4456 mfc_context->uncompressed_picture_source.bo = NULL;
4458 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4459 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4461 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
4462 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4463 mfc_context->direct_mv_buffers[i].bo = NULL;
4466 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4467 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4469 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4470 mfc_context->macroblock_status_buffer.bo = NULL;
4472 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4473 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4475 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4476 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4479 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
4480 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4481 mfc_context->reference_surfaces[i].bo = NULL;
4484 gen8_gpe_context_destroy(&mfc_context->gpe_context);
4486 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4487 mfc_context->mfc_batchbuffer_surface.bo = NULL;
4489 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4490 mfc_context->aux_batchbuffer_surface.bo = NULL;
4492 if (mfc_context->aux_batchbuffer)
4493 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4495 mfc_context->aux_batchbuffer = NULL;
4497 dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4498 mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4500 dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4501 mfc_context->vp8_state.final_frame_bo = NULL;
4503 dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4504 mfc_context->vp8_state.frame_header_bo = NULL;
4506 dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4507 mfc_context->vp8_state.intermediate_bo = NULL;
4509 dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4510 mfc_context->vp8_state.mpc_row_store_bo = NULL;
4512 dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4513 mfc_context->vp8_state.stream_out_bo = NULL;
4515 dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4516 mfc_context->vp8_state.token_statistics_bo = NULL;
4521 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4523 struct encode_state *encode_state,
4524 struct intel_encoder_context *encoder_context)
4529 case VAProfileH264ConstrainedBaseline:
4530 case VAProfileH264Main:
4531 case VAProfileH264High:
4532 case VAProfileH264MultiviewHigh:
4533 case VAProfileH264StereoHigh:
4534 vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4537 /* FIXME: add for other profile */
4538 case VAProfileMPEG2Simple:
4539 case VAProfileMPEG2Main:
4540 vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4543 case VAProfileJPEGBaseline:
4544 jpeg_init_default_qmatrix(ctx, encoder_context);
4545 vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4548 case VAProfileVP8Version0_3:
4549 vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4553 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4560 extern Bool i965_encoder_vp8_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
4562 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4564 struct i965_driver_data *i965 = i965_driver_data(ctx);
4565 struct gen6_mfc_context *mfc_context;
4567 if (IS_CHERRYVIEW(i965->intel.device_info) && encoder_context->codec == CODEC_VP8)
4568 return i965_encoder_vp8_pak_context_init(ctx, encoder_context);
4570 mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4571 assert(mfc_context);
4572 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4574 mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
4575 mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
4576 mfc_context->gpe_context.curbe.length = 32 * 4;
4577 mfc_context->gpe_context.sampler.entry_size = 0;
4578 mfc_context->gpe_context.sampler.max_entries = 0;
4580 if (i965->intel.eu_total > 0)
4581 mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
4583 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4585 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4586 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4587 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4588 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4590 if (IS_GEN9(i965->intel.device_info)) {
4591 gen8_gpe_load_kernels(ctx,
4592 &mfc_context->gpe_context,
4596 gen8_gpe_load_kernels(ctx,
4597 &mfc_context->gpe_context,
4602 mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4603 mfc_context->set_surface_state = gen8_mfc_surface_state;
4604 mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4605 mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4606 mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4607 mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4608 mfc_context->insert_object = gen8_mfc_avc_insert_object;
4609 mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4611 encoder_context->mfc_context = mfc_context;
4612 encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4613 encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4615 if (encoder_context->codec == CODEC_VP8)
4616 encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4618 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;