1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
55 uint32_t frames_split; // number of frames we had to split
56 int chap_mark; // saved chap mark when we're propagating it
57 int64_t last_stop; // Debugging - stop time of previous input frame
63 } frame_info[FRAME_INFO_SIZE];
68 /***********************************************************************
69 * hb_work_encx264_init
70 ***********************************************************************
72 **********************************************************************/
73 int encx264Init( hb_work_object_t * w, hb_job_t * job )
80 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
85 memset( pv->filename, 0, 1024 );
86 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
88 x264_param_default( ¶m );
90 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
91 param.i_width = job->width;
92 param.i_height = job->height;
93 param.i_fps_num = job->vrate;
94 param.i_fps_den = job->vrate_base;
96 /* Set min:max key intervals ratio to 1:10 of fps.
97 * This section is skipped if fps=25 (default).
99 if (job->vrate_base != 1080000)
101 int fps = job->vrate / job->vrate_base;
103 /* adjust +1 when fps has remainder to bump { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
104 if (job->vrate % job->vrate_base)
107 param.i_keyint_min = fps;
108 param.i_keyint_max = fps * 10;
110 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
113 param.i_log_level = X264_LOG_INFO;
114 if( job->h264_level )
117 param.i_level_idc = job->h264_level;
118 hb_log( "encx264: encoding at level %i",
123 This section passes the string x264opts to libx264 for parsing into
124 parameter names and values.
126 The string is set up like this:
127 option1=value1:option2=value 2
129 So, you have to iterate through based on the colons, and then put
130 the left side of the equals sign in "name" and the right side into
131 "value." Then you hand those strings off to x264 for interpretation.
133 This is all based on the universal x264 option handling Loren
134 Merritt implemented in the Mplayer/Mencoder project.
137 if( job->x264opts != NULL && *job->x264opts != '\0' )
139 char *x264opts, *x264opts_start;
141 x264opts = x264opts_start = strdup(job->x264opts);
143 while( x264opts_start && *x264opts )
145 char *name = x264opts;
149 x264opts += strcspn( x264opts, ":" );
156 value = strchr( name, '=' );
164 When B-frames are enabled, the max frame count increments
165 by 1 (regardless of the number of B-frames). If you don't
166 change the duration of the video track when you mux, libmp4
167 barfs. So, check if the x264opts are using B-frames, and
168 when they are, set the boolean job->areBframes as true.
171 if( !( strcmp( name, "bframes" ) ) )
173 if( atoi( value ) > 0 )
179 /* Note b-pyramid here, so the initial delay can be doubled */
180 if( !( strcmp( name, "b-pyramid" ) ) )
184 if( atoi( value ) > 0 )
195 /* Here's where the strings are passed to libx264 for parsing. */
196 ret = x264_param_parse( ¶m, name, value );
198 /* Let x264 sanity check the options for us*/
199 if( ret == X264_PARAM_BAD_NAME )
200 hb_log( "x264 options: Unknown suboption %s", name );
201 if( ret == X264_PARAM_BAD_VALUE )
202 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
204 free(x264opts_start);
207 /* set up the VUI color model & gamma to match what the COLR atom
208 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
209 if( job->color_matrix == 1 )
211 // ITU BT.601 DVD or SD TV content
212 param.vui.i_colorprim = 6;
213 param.vui.i_transfer = 1;
214 param.vui.i_colmatrix = 6;
216 else if( job->color_matrix == 2 )
218 // ITU BT.709 HD content
219 param.vui.i_colorprim = 1;
220 param.vui.i_transfer = 1;
221 param.vui.i_colmatrix = 1;
223 else if ( job->title->width >= 1280 || job->title->height >= 720 )
225 // we guess that 720p or above is ITU BT.709 HD content
226 param.vui.i_colorprim = 1;
227 param.vui.i_transfer = 1;
228 param.vui.i_colmatrix = 1;
232 // ITU BT.601 DVD or SD TV content
233 param.vui.i_colorprim = 6;
234 param.vui.i_transfer = 1;
235 param.vui.i_colmatrix = 6;
238 if( job->anamorphic.mode )
240 param.vui.i_sar_width = job->anamorphic.par_width;
241 param.vui.i_sar_height = job->anamorphic.par_height;
243 hb_log( "encx264: encoding with stored aspect %d/%d",
244 param.vui.i_sar_width, param.vui.i_sar_height );
248 if( job->vquality > 0.0 && job->vquality < 1.0 )
254 param.rc.i_rc_method = X264_RC_CRF;
255 param.rc.f_rf_constant = 51 - job->vquality * 51;
256 hb_log( "encx264: Encoding at constant RF %f",
257 param.rc.f_rf_constant );
262 param.rc.i_rc_method = X264_RC_CQP;
263 param.rc.i_qp_constant = 51 - job->vquality * 51;
264 hb_log( "encx264: encoding at constant QP %d",
265 param.rc.i_qp_constant );
269 else if( job->vquality == 0 || job->vquality >= 1.0 )
271 /* Use the vquality as a raw RF or QP
272 instead of treating it like a percentage. */
277 param.rc.i_rc_method = X264_RC_CRF;
278 param.rc.f_rf_constant = job->vquality;
279 hb_log( "encx264: Encoding at constant RF %f",
280 param.rc.f_rf_constant );
285 param.rc.i_rc_method = X264_RC_CQP;
286 param.rc.i_qp_constant = job->vquality;
287 hb_log( "encx264: encoding at constant QP %d",
288 param.rc.i_qp_constant );
295 param.rc.i_rc_method = X264_RC_ABR;
296 param.rc.i_bitrate = job->vbitrate;
300 param.rc.b_stat_write = 1;
301 param.rc.psz_stat_out = pv->filename;
304 param.rc.b_stat_read = 1;
305 param.rc.psz_stat_in = pv->filename;
310 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
311 pv->x264 = x264_encoder_open( ¶m );
313 x264_encoder_headers( pv->x264, &nal, &nal_count );
315 /* Sequence Parameter Set */
316 x264_nal_encode( w->config->h264.sps, &nal_size, 0, &nal[1] );
317 w->config->h264.sps_length = nal_size;
319 /* Picture Parameter Set */
320 x264_nal_encode( w->config->h264.pps, &nal_size, 0, &nal[2] );
321 w->config->h264.pps_length = nal_size;
323 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
324 job->width, job->height );
326 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
327 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
331 /* Basic initDelay value is the clockrate divided by the FPS
332 -- the length of one frame in clockticks. */
333 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
335 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
336 creates that average rate by repeating 59.95 fields so the max
337 frame size is actually 4504.5 (3 field times). The field durations
338 are computed based on quantized times (see below) so we need an extra
339 two ticks to account for the rounding. */
340 if (pv->init_delay == 3753)
341 pv->init_delay = 4507;
343 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
344 exact in the DVD 27MHz system clock but we never see that) so the
345 rates computed above are all +-1 due to quantization. Worst case
346 is when a clock-rounded-down frame is adjacent to a rounded-up frame
347 which makes one of the frames 2 ticks longer than the nominal
351 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
352 will use the duration of frames running at 23.976fps instead.
353 Since detelecine occasionally makes mistakes and since we have
354 to deal with some really horrible timing jitter from mkvs and
355 mp4s encoded with low resolution clocks, make the delay very
356 conservative if we're not doing CFR. */
362 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
363 pv->init_delay *= job->areBframes;
365 w->config->h264.init_delay = pv->init_delay;
370 void encx264Close( hb_work_object_t * w )
372 hb_work_private_t * pv = w->private_data;
374 if ( pv->frames_split )
376 hb_log( "encx264: %u frames had to be split (%u in, %u out)",
377 pv->frames_split, pv->frames_in, pv->frames_out );
380 * Patch the x264 allocated data back in so that x264 can free it
381 * we have been using our own buffers during the encode to avoid copying.
383 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
384 x264_picture_clean( &pv->pic_in );
385 x264_encoder_close( pv->x264 );
387 w->private_data = NULL;
393 * see comments in definition of 'frame_info' in pv struct for description
394 * of what these routines are doing.
396 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
398 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
399 pv->frame_info[i].duration = in->stop - in->start;
402 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
404 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
405 return pv->frame_info[i].duration;
408 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
409 int i_nal, x264_nal_t *nal )
411 hb_buffer_t *buf = NULL;
412 hb_work_private_t *pv = w->private_data;
413 hb_job_t *job = pv->job;
415 /* Should be way too large */
416 buf = hb_video_buffer_init( job->width, job->height );
420 // use the pts to get the original frame's duration.
421 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
422 buf->start = pic_out->i_pts;
423 buf->stop = pic_out->i_pts + duration;
425 /* Encode all the NALs we were given into buf.
426 NOTE: This code assumes one video frame per NAL (but there can
427 be other stuff like SPS and/or PPS). If there are multiple
428 frames we only get the duration of the first which will
429 eventually screw up the muxer & decoder. */
431 for( i = 0; i < i_nal; i++ )
433 int data = buf->alloc - buf->size;
434 int size = x264_nal_encode( buf->data + buf->size, &data, 1, &nal[i] );
440 if( job->mux & HB_MUX_AVI )
442 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
444 buf->frametype = HB_FRAME_KEY;
450 /* H.264 in .mp4 or .mkv */
451 int naltype = buf->data[buf->size+4] & 0x1f;
452 if ( naltype == 0x7 || naltype == 0x8 )
454 // Sequence Parameter Set & Program Parameter Set go in the
455 // mp4 header so skip them here
459 /* H.264 in mp4 (stolen from mp4creator) */
460 buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
461 buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
462 buf->data[buf->size+2] = ( ( size - 4 ) >> 8 ) & 0xFF;
463 buf->data[buf->size+3] = ( ( size - 4 ) >> 0 ) & 0xFF;
465 /* Decide what type of frame we have. */
466 switch( pic_out->i_type )
469 buf->frametype = HB_FRAME_IDR;
470 /* if we have a chapter marker pending and this
471 frame's presentation time stamp is at or after
472 the marker's time stamp, use this as the
474 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
477 buf->new_chap = pv->chap_mark;
482 buf->frametype = HB_FRAME_I;
486 buf->frametype = HB_FRAME_P;
490 buf->frametype = HB_FRAME_B;
493 /* This is for b-pyramid, which has reference b-frames
494 However, it doesn't seem to ever be used... */
496 buf->frametype = HB_FRAME_BREF;
499 // If it isn't the above, what type of frame is it??
505 /* Since libx264 doesn't tell us when b-frames are
506 themselves reference frames, figure it out on our own. */
507 if( (buf->frametype == HB_FRAME_B) &&
508 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
509 buf->frametype = HB_FRAME_BREF;
511 /* Expose disposable bit to muxer. */
512 if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
513 buf->flags &= ~HB_FRAME_REF;
515 buf->flags |= HB_FRAME_REF;
519 // make sure we found at least one video frame
520 if ( buf->size <= 0 )
522 // no video - discard the buf
523 hb_buffer_close( &buf );
528 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
530 hb_work_private_t *pv = w->private_data;
531 hb_job_t *job = pv->job;
533 /* Point x264 at our current buffers Y(UV) data. */
534 pv->pic_in.img.plane[0] = in->data;
536 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
539 /* XXX x264 has currently no option for grayscale encoding */
540 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
541 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
545 /* Point x264 at our buffers (Y)UV data */
546 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
547 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
549 if( in->new_chap && job->chapter_markers )
551 /* chapters have to start with an IDR frame so request that this
552 frame be coded as IDR. Since there may be up to 16 frames
553 currently buffered in the encoder remember the timestamp so
554 when this frame finally pops out of the encoder we'll mark
555 its buffer as the start of a chapter. */
556 pv->pic_in.i_type = X264_TYPE_IDR;
557 if( pv->next_chap == 0 )
559 pv->next_chap = in->start;
560 pv->chap_mark = in->new_chap;
562 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
567 pv->pic_in.i_type = X264_TYPE_AUTO;
569 pv->pic_in.i_qpplus1 = 0;
571 /* XXX this is temporary debugging code to check that the upstream
572 * modules (render & sync) have generated a continuous, self-consistent
573 * frame stream with the current frame's start time equal to the
574 * previous frame's stop time.
576 if( pv->last_stop != in->start )
578 hb_log("encx264 input continuity err: last stop %lld start %lld",
579 pv->last_stop, in->start);
581 pv->last_stop = in->stop;
583 // Remember info about this frame that we need to pass across
584 // the x264_encoder_encode call (since it reorders frames).
585 save_frame_info( pv, in );
587 /* Feed the input PTS to x264 so it can figure out proper output PTS */
588 pv->pic_in.i_pts = in->start;
590 x264_picture_t pic_out;
594 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
597 return nal_encode( w, &pic_out, i_nal, nal );
602 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
603 hb_buffer_t ** buf_out )
605 hb_work_private_t *pv = w->private_data;
606 hb_buffer_t *in = *buf_in;
612 // EOF on input. Flush any frames still in the decoder then
613 // send the eof downstream to tell the muxer we're done.
614 x264_picture_t pic_out;
617 hb_buffer_t *last_buf = NULL;
621 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
625 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
629 if ( last_buf == NULL )
632 last_buf->next = buf;
636 // Flushed everything - add the eof to the end of the chain.
637 if ( last_buf == NULL )
646 // Not EOF - encode the packet & wrap it in a NAL
649 // if we're re-ordering frames, check if this frame is too large to reorder
650 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
652 // This frame's duration is larger than the time allotted for b-frame
653 // reordering. That means that if it's used as a reference the decoder
654 // won't be able to move it early enough to render it in correct
655 // sequence & the playback will have odd jumps & twitches. To make
656 // sure this doesn't happen we pretend this frame is multiple
657 // frames, each with duration <= init_delay. Since each of these
658 // new frames contains the same image the visual effect is identical
659 // to the original but the resulting stream can now be coded without
660 // error. We take advantage of the fact that x264 buffers frame
661 // data internally to feed the same image into the encoder multiple
662 // times, just changing its start & stop times each time.
664 int64_t orig_stop = in->stop;
665 int64_t new_stop = in->start;
666 hb_buffer_t *last_buf = NULL;
668 // We want to spread the new frames uniformly over the total time
669 // so that we don't end up with a very short frame at the end.
670 // In the number of pieces calculation we add in init_delay-1 to
671 // round up but not add an extra piece if the frame duration is
672 // a multiple of init_delay. The final increment of frame_dur is
673 // to restore the bits that got truncated by the divide on the
674 // previous line. If we don't do this we end up with an extra tiny
675 // frame at the end whose duration is npieces-1.
676 int64_t frame_dur = orig_stop - new_stop;
677 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
678 frame_dur /= npieces;
681 while ( in->start < orig_stop )
683 new_stop += frame_dur;
684 if ( new_stop > orig_stop )
685 new_stop = orig_stop;
687 hb_buffer_t *buf = x264_encode( w, in );
691 if ( last_buf == NULL )
694 last_buf->next = buf;
697 in->start = new_stop;
703 *buf_out = x264_encode( w, in );