1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
13 int encx264Init( hb_work_object_t *, hb_job_t * );
14 int encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
15 void encx264Close( hb_work_object_t * );
17 hb_work_object_t hb_encx264 =
20 "H.264/AVC encoder (libx264)",
26 #define DTS_BUFFER_SIZE 32
29 * The frame info struct remembers information about each frame across calls
30 * to x264_encoder_encode. Since frames are uniquely identified by their
31 * timestamp, we use some bits of the timestamp as an index. The LSB is
32 * chosen so that two successive frames will have different values in the
33 * bits over any plausible range of frame rates. (Starting with bit 8 allows
34 * any frame rate slower than 352fps.) The MSB determines the size of the array.
35 * It is chosen so that two frames can't use the same slot during the
36 * encoder's max frame delay (set by the standard as 16 frames) and so
37 * that, up to some minimum frame rate, frames are guaranteed to map to
38 * different slots. (An MSB of 17 which is 2^(17-8+1) = 1024 slots guarantees
39 * no collisions down to a rate of .7 fps).
41 #define FRAME_INFO_MAX2 (8) // 2^8 = 256; 90000/256 = 352 frames/sec
42 #define FRAME_INFO_MIN2 (17) // 2^17 = 128K; 90000/131072 = 1.4 frames/sec
43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
46 struct hb_work_private_s
50 x264_picture_t pic_in;
51 uint8_t *x264_allocated_pic;
55 uint32_t frames_split; // number of frames we had to split
56 int chap_mark; // saved chap mark when we're propagating it
57 int64_t last_stop; // Debugging - stop time of previous input frame
63 } frame_info[FRAME_INFO_SIZE];
68 /***********************************************************************
69 * hb_work_encx264_init
70 ***********************************************************************
72 **********************************************************************/
73 int encx264Init( hb_work_object_t * w, hb_job_t * job )
79 hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
84 memset( pv->filename, 0, 1024 );
85 hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
87 x264_param_default( ¶m );
89 /* Default weightp to off for baseline,
90 overridable through x264 option strings. */
91 if( job->x264opts != NULL && *job->x264opts != '\0' )
93 char *x264opts, *x264opts_start;
95 x264opts = x264opts_start = strdup(job->x264opts);
97 while( x264opts_start && *x264opts )
99 char *name = x264opts;
102 x264opts += strcspn( x264opts, ":" );
109 value = strchr( name, '=' );
117 When B-frames are enabled, the max frame count increments
118 by 1 (regardless of the number of B-frames). If you don't
119 change the duration of the video track when you mux, libmp4
120 barfs. So, check if the x264opts aren't using B-frames, and
121 when they aren't, set the boolean job->areBframes as false.
123 if( !( strcmp( name, "bframes" ) ) )
125 if( atoi( value ) == 0 )
127 param.analyse.i_weighted_pred = X264_WEIGHTP_NONE;
134 param.analyse.b_psnr = 1;
135 param.analyse.b_ssim = 1;
137 param.i_threads = ( hb_get_cpu_count() * 3 / 2 );
138 param.i_width = job->width;
139 param.i_height = job->height;
140 param.i_fps_num = job->vrate;
141 param.i_fps_den = job->vrate_base;
143 /* Disable annexb. Inserts size into nal header instead of start code */
146 /* Set min:max key intervals ratio to 1:10 of fps.
147 * This section is skipped if fps=25 (default).
149 if (job->vrate_base != 1080000)
151 if (job->pass == 2 && !job->cfr )
153 /* Even though the framerate might be different due to VFR,
154 we still want the same keyframe intervals as the 1st pass,
155 so the 1st pass stats won't conflict on frame decisions. */
156 hb_interjob_t * interjob = hb_interjob_get( job->h );
157 param.i_keyint_min = ( interjob->vrate / interjob->vrate_base ) + 1;
158 param.i_keyint_max = ( 10 * interjob->vrate / interjob->vrate_base ) + 1;
162 int fps = job->vrate / job->vrate_base;
164 /* adjust +1 when fps has remainder to bump
165 { 23.976, 29.976, 59.94 } to { 24, 30, 60 } */
166 if (job->vrate % job->vrate_base)
169 param.i_keyint_min = fps;
170 param.i_keyint_max = fps * 10;
173 hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
176 param.i_log_level = X264_LOG_INFO;
177 if( job->h264_level )
180 param.i_level_idc = job->h264_level;
181 hb_log( "encx264: encoding at level %i",
185 /* B-frames are on by default.*/
189 This section passes the string x264opts to libx264 for parsing into
190 parameter names and values.
192 The string is set up like this:
193 option1=value1:option2=value 2
195 So, you have to iterate through based on the colons, and then put
196 the left side of the equals sign in "name" and the right side into
197 "value." Then you hand those strings off to x264 for interpretation.
199 This is all based on the universal x264 option handling Loren
200 Merritt implemented in the Mplayer/Mencoder project.
203 if( job->x264opts != NULL && *job->x264opts != '\0' )
205 char *x264opts, *x264opts_start;
207 x264opts = x264opts_start = strdup(job->x264opts);
209 while( x264opts_start && *x264opts )
211 char *name = x264opts;
215 x264opts += strcspn( x264opts, ":" );
222 value = strchr( name, '=' );
230 When B-frames are enabled, the max frame count increments
231 by 1 (regardless of the number of B-frames). If you don't
232 change the duration of the video track when you mux, libmp4
233 barfs. So, check if the x264opts aren't using B-frames, and
234 when they aren't, set the boolean job->areBframes as false.
236 if( !( strcmp( name, "bframes" ) ) )
238 if( atoi( value ) == 0 )
244 /* Note b-pyramid here, so the initial delay can be doubled */
245 if( !( strcmp( name, "b-pyramid" ) ) )
249 if( atoi( value ) > 0 )
258 if( value == NULL || !strcmp( value, "1" ) )
262 else if( !strcmp( value, "0" ) )
268 /* Here's where the strings are passed to libx264 for parsing. */
269 ret = x264_param_parse( ¶m, name, value );
271 /* Let x264 sanity check the options for us*/
272 if( ret == X264_PARAM_BAD_NAME )
273 hb_log( "x264 options: Unknown suboption %s", name );
274 if( ret == X264_PARAM_BAD_VALUE )
275 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
277 free(x264opts_start);
280 /* set up the VUI color model & gamma to match what the COLR atom
281 * set in muxmp4.c says. See libhb/muxmp4.c for notes. */
282 if( job->color_matrix == 1 )
284 // ITU BT.601 DVD or SD TV content
285 param.vui.i_colorprim = 6;
286 param.vui.i_transfer = 1;
287 param.vui.i_colmatrix = 6;
289 else if( job->color_matrix == 2 )
291 // ITU BT.709 HD content
292 param.vui.i_colorprim = 1;
293 param.vui.i_transfer = 1;
294 param.vui.i_colmatrix = 1;
296 else if ( job->title->width >= 1280 || job->title->height >= 720 )
298 // we guess that 720p or above is ITU BT.709 HD content
299 param.vui.i_colorprim = 1;
300 param.vui.i_transfer = 1;
301 param.vui.i_colmatrix = 1;
305 // ITU BT.601 DVD or SD TV content
306 param.vui.i_colorprim = 6;
307 param.vui.i_transfer = 1;
308 param.vui.i_colmatrix = 6;
311 if( job->anamorphic.mode )
313 param.vui.i_sar_width = job->anamorphic.par_width;
314 param.vui.i_sar_height = job->anamorphic.par_height;
316 hb_log( "encx264: encoding with stored aspect %d/%d",
317 param.vui.i_sar_width, param.vui.i_sar_height );
321 if( job->vquality > 0.0 && job->vquality < 1.0 )
324 param.rc.i_rc_method = X264_RC_CRF;
325 param.rc.f_rf_constant = 51 - job->vquality * 51;
326 hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
328 else if( job->vquality == 0 || job->vquality >= 1.0 )
330 /* Use the vquality as a raw RF or QP
331 instead of treating it like a percentage. */
333 param.rc.i_rc_method = X264_RC_CRF;
334 param.rc.f_rf_constant = job->vquality;
335 hb_log( "encx264: Encoding at constant RF %f", param.rc.f_rf_constant );
340 param.rc.i_rc_method = X264_RC_ABR;
341 param.rc.i_bitrate = job->vbitrate;
345 param.rc.b_stat_write = 1;
346 param.rc.psz_stat_out = pv->filename;
349 param.rc.b_stat_read = 1;
350 param.rc.psz_stat_in = pv->filename;
355 hb_deep_log( 2, "encx264: opening libx264 (pass %d)", job->pass );
356 pv->x264 = x264_encoder_open( ¶m );
358 x264_encoder_headers( pv->x264, &nal, &nal_count );
360 /* Sequence Parameter Set */
361 memcpy(w->config->h264.sps, nal[1].p_payload + 4, nal[1].i_payload - 4);
362 w->config->h264.sps_length = nal[1].i_payload - 4;
364 /* Picture Parameter Set */
365 memcpy(w->config->h264.pps, nal[2].p_payload + 4, nal[2].i_payload - 4);
366 w->config->h264.pps_length = nal[2].i_payload - 4;
368 x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
369 job->width, job->height );
371 pv->pic_in.img.i_stride[2] = pv->pic_in.img.i_stride[1] = ( ( job->width + 1 ) >> 1 );
372 pv->x264_allocated_pic = pv->pic_in.img.plane[0];
376 /* Basic initDelay value is the clockrate divided by the FPS
377 -- the length of one frame in clockticks. */
378 pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
380 /* 23.976-length frames are 3753.75 ticks long on average but the DVD
381 creates that average rate by repeating 59.95 fields so the max
382 frame size is actually 4504.5 (3 field times). The field durations
383 are computed based on quantized times (see below) so we need an extra
384 two ticks to account for the rounding. */
385 if (pv->init_delay == 3753)
386 pv->init_delay = 4507;
388 /* frame rates are not exact in the DVD 90KHz PTS clock (they are
389 exact in the DVD 27MHz system clock but we never see that) so the
390 rates computed above are all +-1 due to quantization. Worst case
391 is when a clock-rounded-down frame is adjacent to a rounded-up frame
392 which makes one of the frames 2 ticks longer than the nominal
396 /* For VFR, libhb sees the FPS as 29.97, but the longest frames
397 will use the duration of frames running at 23.976fps instead.
398 Since detelecine occasionally makes mistakes and since we have
399 to deal with some really horrible timing jitter from mkvs and
400 mp4s encoded with low resolution clocks, make the delay very
401 conservative if we're not doing CFR. */
407 /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
408 pv->init_delay *= job->areBframes;
410 w->config->h264.init_delay = pv->init_delay;
415 void encx264Close( hb_work_object_t * w )
417 hb_work_private_t * pv = w->private_data;
419 if ( pv->frames_split )
421 hb_log( "encx264: %u frames had to be split (%u in, %u out)",
422 pv->frames_split, pv->frames_in, pv->frames_out );
425 * Patch the x264 allocated data back in so that x264 can free it
426 * we have been using our own buffers during the encode to avoid copying.
428 pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
429 x264_picture_clean( &pv->pic_in );
430 x264_encoder_close( pv->x264 );
432 w->private_data = NULL;
438 * see comments in definition of 'frame_info' in pv struct for description
439 * of what these routines are doing.
441 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
443 int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
444 pv->frame_info[i].duration = in->stop - in->start;
447 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
449 int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
450 return pv->frame_info[i].duration;
453 static hb_buffer_t *nal_encode( hb_work_object_t *w, x264_picture_t *pic_out,
454 int i_nal, x264_nal_t *nal )
456 hb_buffer_t *buf = NULL;
457 hb_work_private_t *pv = w->private_data;
458 hb_job_t *job = pv->job;
460 /* Should be way too large */
461 buf = hb_video_buffer_init( job->width, job->height );
465 // use the pts to get the original frame's duration.
466 int64_t duration = get_frame_duration( pv, pic_out->i_pts );
467 buf->start = pic_out->i_pts;
468 buf->stop = pic_out->i_pts + duration;
470 /* Encode all the NALs we were given into buf.
471 NOTE: This code assumes one video frame per NAL (but there can
472 be other stuff like SPS and/or PPS). If there are multiple
473 frames we only get the duration of the first which will
474 eventually screw up the muxer & decoder. */
476 for( i = 0; i < i_nal; i++ )
478 int size = nal[i].i_payload;
479 memcpy(buf->data + buf->size, nal[i].p_payload, size);
485 if( job->mux & HB_MUX_AVI )
487 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
489 buf->frametype = HB_FRAME_KEY;
495 /* H.264 in .mp4 or .mkv */
496 switch( nal[i].i_type )
498 /* Sequence Parameter Set & Program Parameter Set go in the
499 * mp4 header so skip them here
512 /* Decide what type of frame we have. */
513 switch( pic_out->i_type )
516 buf->frametype = HB_FRAME_IDR;
517 /* if we have a chapter marker pending and this
518 frame's presentation time stamp is at or after
519 the marker's time stamp, use this as the
521 if( pv->next_chap != 0 && pv->next_chap <= pic_out->i_pts )
524 buf->new_chap = pv->chap_mark;
529 buf->frametype = HB_FRAME_I;
533 buf->frametype = HB_FRAME_P;
537 buf->frametype = HB_FRAME_B;
540 /* This is for b-pyramid, which has reference b-frames
541 However, it doesn't seem to ever be used... */
543 buf->frametype = HB_FRAME_BREF;
546 // If it isn't the above, what type of frame is it??
552 /* Since libx264 doesn't tell us when b-frames are
553 themselves reference frames, figure it out on our own. */
554 if( (buf->frametype == HB_FRAME_B) &&
555 (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
556 buf->frametype = HB_FRAME_BREF;
558 /* Expose disposable bit to muxer. */
559 if( nal[i].i_ref_idc == NAL_PRIORITY_DISPOSABLE )
560 buf->flags &= ~HB_FRAME_REF;
562 buf->flags |= HB_FRAME_REF;
566 // make sure we found at least one video frame
567 if ( buf->size <= 0 )
569 // no video - discard the buf
570 hb_buffer_close( &buf );
575 static hb_buffer_t *x264_encode( hb_work_object_t *w, hb_buffer_t *in )
577 hb_work_private_t *pv = w->private_data;
578 hb_job_t *job = pv->job;
580 /* Point x264 at our current buffers Y(UV) data. */
581 pv->pic_in.img.plane[0] = in->data;
583 int uvsize = ( (job->width + 1) >> 1 ) * ( (job->height + 1) >> 1 );
586 /* XXX x264 has currently no option for grayscale encoding */
587 memset( pv->pic_in.img.plane[1], 0x80, uvsize );
588 memset( pv->pic_in.img.plane[2], 0x80, uvsize );
592 /* Point x264 at our buffers (Y)UV data */
593 pv->pic_in.img.plane[1] = in->data + job->width * job->height;
594 pv->pic_in.img.plane[2] = pv->pic_in.img.plane[1] + uvsize;
596 if( in->new_chap && job->chapter_markers )
598 /* chapters have to start with an IDR frame so request that this
599 frame be coded as IDR. Since there may be up to 16 frames
600 currently buffered in the encoder remember the timestamp so
601 when this frame finally pops out of the encoder we'll mark
602 its buffer as the start of a chapter. */
603 pv->pic_in.i_type = X264_TYPE_IDR;
604 if( pv->next_chap == 0 )
606 pv->next_chap = in->start;
607 pv->chap_mark = in->new_chap;
609 /* don't let 'work_loop' put a chapter mark on the wrong buffer */
614 pv->pic_in.i_type = X264_TYPE_AUTO;
616 pv->pic_in.i_qpplus1 = 0;
618 /* XXX this is temporary debugging code to check that the upstream
619 * modules (render & sync) have generated a continuous, self-consistent
620 * frame stream with the current frame's start time equal to the
621 * previous frame's stop time.
623 if( pv->last_stop != in->start )
625 hb_log("encx264 input continuity err: last stop %"PRId64" start %"PRId64,
626 pv->last_stop, in->start);
628 pv->last_stop = in->stop;
630 // Remember info about this frame that we need to pass across
631 // the x264_encoder_encode call (since it reorders frames).
632 save_frame_info( pv, in );
634 /* Feed the input PTS to x264 so it can figure out proper output PTS */
635 pv->pic_in.i_pts = in->start;
637 x264_picture_t pic_out;
641 x264_encoder_encode( pv->x264, &nal, &i_nal, &pv->pic_in, &pic_out );
644 return nal_encode( w, &pic_out, i_nal, nal );
649 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
650 hb_buffer_t ** buf_out )
652 hb_work_private_t *pv = w->private_data;
653 hb_buffer_t *in = *buf_in;
659 // EOF on input. Flush any frames still in the decoder then
660 // send the eof downstream to tell the muxer we're done.
661 x264_picture_t pic_out;
664 hb_buffer_t *last_buf = NULL;
668 x264_encoder_encode( pv->x264, &nal, &i_nal, NULL, &pic_out );
672 hb_buffer_t *buf = nal_encode( w, &pic_out, i_nal, nal );
676 if ( last_buf == NULL )
679 last_buf->next = buf;
683 // Flushed everything - add the eof to the end of the chain.
684 if ( last_buf == NULL )
693 // Not EOF - encode the packet & wrap it in a NAL
696 // if we're re-ordering frames, check if this frame is too large to reorder
697 if ( pv->init_delay && in->stop - in->start > pv->init_delay )
699 // This frame's duration is larger than the time allotted for b-frame
700 // reordering. That means that if it's used as a reference the decoder
701 // won't be able to move it early enough to render it in correct
702 // sequence & the playback will have odd jumps & twitches. To make
703 // sure this doesn't happen we pretend this frame is multiple
704 // frames, each with duration <= init_delay. Since each of these
705 // new frames contains the same image the visual effect is identical
706 // to the original but the resulting stream can now be coded without
707 // error. We take advantage of the fact that x264 buffers frame
708 // data internally to feed the same image into the encoder multiple
709 // times, just changing its start & stop times each time.
711 int64_t orig_stop = in->stop;
712 int64_t new_stop = in->start;
713 hb_buffer_t *last_buf = NULL;
715 // We want to spread the new frames uniformly over the total time
716 // so that we don't end up with a very short frame at the end.
717 // In the number of pieces calculation we add in init_delay-1 to
718 // round up but not add an extra piece if the frame duration is
719 // a multiple of init_delay. The final increment of frame_dur is
720 // to restore the bits that got truncated by the divide on the
721 // previous line. If we don't do this we end up with an extra tiny
722 // frame at the end whose duration is npieces-1.
723 int64_t frame_dur = orig_stop - new_stop;
724 int64_t npieces = ( frame_dur + pv->init_delay - 1 ) / pv->init_delay;
725 frame_dur /= npieces;
728 while ( in->start < orig_stop )
730 new_stop += frame_dur;
731 if ( new_stop > orig_stop )
732 new_stop = orig_stop;
734 hb_buffer_t *buf = x264_encode( w, in );
738 if ( last_buf == NULL )
741 last_buf->next = buf;
744 in->start = new_stop;
750 *buf_out = x264_encode( w, in );