1 /* $Id: sync.c,v 1.38 2005/04/14 21:57:58 titer Exp $
3 This file is part of the HandBrake source code.
4 Homepage: <http://handbrake.fr/>.
5 It may be used under the terms of the GNU General Public License. */
10 #include "samplerate.h"
11 #include "libavcodec/avcodec.h"
14 #undef INT64_MIN /* Because it isn't defined correctly in Zeta */
16 #define INT64_MIN (-9223372036854775807LL-1)
18 #define AC3_SAMPLES_PER_FRAME 1536
24 int64_t next_start; /* start time of next output frame */
25 int64_t next_pts; /* start time of next input frame */
26 int64_t first_drop; /* PTS of first 'went backwards' frame dropped */
27 int drop_count; /* count of 'time went backwards' drops */
39 struct hb_work_private_s
45 hb_subtitle_t * subtitle;
47 int64_t next_start; /* start time of next output frame */
48 int64_t next_pts; /* start time of next input frame */
49 int64_t first_drop; /* PTS of first 'went backwards' frame dropped */
50 int drop_count; /* count of 'time went backwards' drops */
51 int drops; /* frames dropped to make a cbr video stream */
52 int dups; /* frames duplicated to make a cbr video stream */
56 int chap_mark; /* to propagate chapter mark across a drop */
57 hb_buffer_t * cur; /* The next picture to process */
60 hb_sync_audio_t sync_audio[8];
63 uint64_t st_counts[4];
68 /***********************************************************************
70 **********************************************************************/
71 static void InitAudio( hb_work_object_t * w, int i );
72 static int SyncVideo( hb_work_object_t * w );
73 static void SyncAudio( hb_work_object_t * w, int i );
74 static void InsertSilence( hb_work_object_t * w, int i, int64_t d );
75 static void UpdateState( hb_work_object_t * w );
77 /***********************************************************************
79 ***********************************************************************
80 * Initialize the work object
81 **********************************************************************/
82 int syncInit( hb_work_object_t * w, hb_job_t * job )
84 hb_title_t * title = job->title;
85 hb_chapter_t * chapter;
88 hb_work_private_t * pv;
90 pv = calloc( 1, sizeof( hb_work_private_t ) );
94 pv->pts_offset = INT64_MIN;
97 /* Calculate how many video frames we are expecting */
99 for( i = job->chapter_start; i <= job->chapter_end; i++ )
101 chapter = hb_list_item( title->list_chapter, i - 1 );
102 duration += chapter->duration;
105 /* 1 second safety so we're sure we won't miss anything */
106 pv->count_frames_max = duration * job->vrate / job->vrate_base / 90000;
108 hb_log( "sync: expecting %d video frames", pv->count_frames_max );
110 /* Initialize libsamplerate for every audio track we have */
111 for( i = 0; i < hb_list_count( title->list_audio ); i++ )
116 /* Get subtitle info, if any */
117 pv->subtitle = hb_list_item( title->list_subtitle, 0 );
119 pv->video_sequence = 0;
124 /***********************************************************************
126 ***********************************************************************
128 **********************************************************************/
129 void syncClose( hb_work_object_t * w )
131 hb_work_private_t * pv = w->private_data;
132 hb_job_t * job = pv->job;
133 hb_title_t * title = job->title;
134 hb_audio_t * audio = NULL;
139 hb_buffer_close( &pv->cur );
142 hb_log( "sync: got %d frames, %d expected",
143 pv->count_frames, pv->count_frames_max );
145 if (pv->drops || pv->dups )
147 hb_log( "sync: %d frames dropped, %d duplicated", pv->drops, pv->dups );
150 for( i = 0; i < hb_list_count( title->list_audio ); i++ )
152 audio = hb_list_item( title->list_audio, i );
153 if( audio->config.out.codec == HB_ACODEC_AC3 )
155 free( pv->sync_audio[i].ac3_buf );
159 src_delete( pv->sync_audio[i].state );
164 w->private_data = NULL;
167 /***********************************************************************
169 ***********************************************************************
170 * The root routine of this work abject
172 * The way this works is that we are syncing the audio to the PTS of
173 * the last video that we processed. That's why we skip the audio sync
174 * if we haven't got a valid PTS from the video yet.
176 **********************************************************************/
177 int syncWork( hb_work_object_t * w, hb_buffer_t ** unused1,
178 hb_buffer_t ** unused2 )
180 hb_work_private_t * pv = w->private_data;
183 /* If we ever got a video frame, handle audio now */
184 if( pv->pts_offset != INT64_MIN )
186 for( i = 0; i < hb_list_count( pv->job->title->list_audio ); i++ )
193 return SyncVideo( w );
196 hb_work_object_t hb_sync =
205 static void InitAudio( hb_work_object_t * w, int i )
207 hb_work_private_t * pv = w->private_data;
208 hb_job_t * job = pv->job;
209 hb_title_t * title = job->title;
210 hb_sync_audio_t * sync;
212 sync = &pv->sync_audio[i];
213 sync->audio = hb_list_item( title->list_audio, i );
215 if( sync->audio->config.out.codec == HB_ACODEC_AC3 )
217 /* Have a silent AC-3 frame ready in case we have to fill a
223 codec = avcodec_find_encoder( CODEC_ID_AC3 );
224 c = avcodec_alloc_context();
226 c->bit_rate = sync->audio->config.in.bitrate;
227 c->sample_rate = sync->audio->config.in.samplerate;
228 c->channels = HB_INPUT_CH_LAYOUT_GET_DISCRETE_COUNT( sync->audio->config.in.channel_layout );
230 if( avcodec_open( c, codec ) < 0 )
232 hb_log( "sync: avcodec_open failed" );
236 zeros = calloc( AC3_SAMPLES_PER_FRAME *
237 sizeof( short ) * c->channels, 1 );
238 sync->ac3_size = sync->audio->config.in.bitrate * AC3_SAMPLES_PER_FRAME /
239 sync->audio->config.in.samplerate / 8;
240 sync->ac3_buf = malloc( sync->ac3_size );
242 if( avcodec_encode_audio( c, sync->ac3_buf, sync->ac3_size,
243 zeros ) != sync->ac3_size )
245 hb_log( "sync: avcodec_encode_audio failed" );
254 /* Initialize libsamplerate */
256 sync->state = src_new( SRC_SINC_MEDIUM_QUALITY, HB_AMIXDOWN_GET_DISCRETE_CHANNEL_COUNT(sync->audio->config.out.mixdown), &error );
257 sync->data.end_of_input = 0;
261 /***********************************************************************
263 ***********************************************************************
265 **********************************************************************/
266 static int SyncVideo( hb_work_object_t * w )
268 hb_work_private_t * pv = w->private_data;
269 hb_buffer_t * cur, * next, * sub = NULL;
270 hb_job_t * job = pv->job;
277 if( !pv->cur && !( pv->cur = hb_fifo_get( job->fifo_raw ) ) )
279 /* We haven't even got a frame yet */
285 /* we got an end-of-stream. Feed it downstream & signal that we're done. */
286 hb_fifo_push( job->fifo_sync, hb_buffer_init( 0 ) );
291 /* At this point we have a frame to process. Let's check
292 1) if we will be able to push into the fifo ahead
293 2) if the next frame is there already, since we need it to
294 compute the duration of the current frame*/
295 while( !hb_fifo_is_full( job->fifo_sync ) &&
296 ( next = hb_fifo_see( job->fifo_raw ) ) )
298 hb_buffer_t * buf_tmp;
300 if( next->size == 0 )
302 /* we got an end-of-stream. Feed it downstream & signal that
303 * we're done. Note that this means we drop the final frame of
304 * video (we don't know its duration). On DVDs the final frame
305 * is often strange and dropping it seems to be a good idea. */
306 hb_fifo_push( job->fifo_sync, hb_buffer_init( 0 ) );
310 if( pv->pts_offset == INT64_MIN )
312 /* This is our first frame */
314 if ( cur->start != 0 )
317 * The first pts from a dvd should always be zero but
318 * can be non-zero with a transport or program stream since
319 * we're not guaranteed to start on an IDR frame. If we get
320 * a non-zero initial PTS extend its duration so it behaves
321 * as if it started at zero so that our audio timing will
324 hb_log( "sync: first pts is %lld", cur->start );
330 * since the first frame is always 0 and the upstream reader code
331 * is taking care of adjusting for pts discontinuities, we just have
332 * to deal with the next frame's start being in the past. This can
333 * happen when the PTS is adjusted after data loss but video frame
334 * reordering causes some frames with the old clock to appear after
335 * the clock change. This creates frames that overlap in time which
336 * looks to us like time going backward. The downstream muxing code
337 * can deal with overlaps of up to a frame time but anything larger
338 * we handle by dropping frames here.
340 if ( (int64_t)( next->start - cur->start ) <= 0 )
342 if ( pv->first_drop == 0 )
344 pv->first_drop = next->start;
347 buf_tmp = hb_fifo_get( job->fifo_raw );
348 if ( buf_tmp->new_chap )
350 // don't drop a chapter mark when we drop the buffer
351 pv->chap_mark = buf_tmp->new_chap;
353 hb_buffer_close( &buf_tmp );
356 if ( pv->first_drop )
358 hb_log( "sync: video time didn't advance - dropped %d frames "
359 "(delta %d ms, current %lld, next %lld, dur %d)",
360 pv->drop_count, (int)( cur->start - pv->first_drop ) / 90,
361 cur->start, next->start, (int)( next->start - cur->start ) );
367 * Track the video sequence number localy so that we can sync the audio
368 * to it using the sequence number as well as the PTS.
370 pv->video_sequence = cur->sequence;
372 /* Look for a subtitle for this frame */
376 while( ( sub = hb_fifo_see( pv->subtitle->fifo_raw ) ) )
378 /* If two subtitles overlap, make the first one stop
379 when the second one starts */
380 sub2 = hb_fifo_see2( pv->subtitle->fifo_raw );
381 if( sub2 && sub->stop > sub2->start )
382 sub->stop = sub2->start;
384 // hb_log("0x%x: video seq: %lld subtitle sequence: %lld",
385 // sub, cur->sequence, sub->sequence);
387 if( sub->sequence > cur->sequence )
390 * The video is behind where we are, so wait until
391 * it catches up to the same reader point on the
392 * DVD. Then our PTS should be in the same region
399 if( sub->stop > cur->start ) {
401 * The stop time is in the future, so fall through
402 * and we'll deal with it in the next block of
409 * The subtitle is older than this picture, trash it
411 sub = hb_fifo_get( pv->subtitle->fifo_raw );
412 hb_buffer_close( &sub );
416 * There is a valid subtitle, is it time to display it?
420 if( sub->stop > sub->start)
423 * Normal subtitle which ends after it starts, check to
424 * see that the current video is between the start and end.
426 if( cur->start > sub->start &&
427 cur->start < sub->stop )
430 * We should be playing this, so leave the
433 * fall through to display
435 if( ( sub->stop - sub->start ) < ( 3 * 90000 ) )
438 * Subtitle is on for less than three seconds, extend
439 * the time that it is displayed to make it easier
440 * to read. Make it 3 seconds or until the next
441 * subtitle is displayed.
443 * This is in response to Indochine which only
444 * displays subs for 1 second - too fast to read.
446 sub->stop = sub->start + ( 3 * 90000 );
448 sub2 = hb_fifo_see2( pv->subtitle->fifo_raw );
450 if( sub2 && sub->stop > sub2->start )
452 sub->stop = sub2->start;
459 * Defer until the play point is within the subtitle
467 * The end of the subtitle is less than the start, this is a
468 * sign of a PTS discontinuity.
470 if( sub->start > cur->start )
473 * we haven't reached the start time yet, or
474 * we have jumped backwards after having
475 * already started this subtitle.
477 if( cur->start < sub->stop )
480 * We have jumped backwards and so should
481 * continue displaying this subtitle.
483 * fall through to display.
489 * Defer until the play point is within the subtitle
495 * Play this subtitle as the start is greater than our
498 * fall through to display/
506 if ( job->mux & HB_MUX_AVI || job->cfr )
509 * The concept of variable frame rate video was a bit too advanced
510 * for Microsoft so AVI doesn't support it. Since almost all dvd
511 * video is VFR we have to convert it to constant frame rate to
512 * put it in an AVI container. So here we duplicate, drop and
513 * otherwise trash video frames to appease the gods of Redmond.
516 /* mpeg durations are exact when expressed in ticks of the
517 * 27MHz System clock but not in HB's 90KHz PTS clock. To avoid
518 * a truncation bias that will eventually cause the audio to desync
519 * we compute the duration of the next frame using 27MHz ticks
520 * then truncate it to 90KHz. */
521 duration = ( (int64_t)(pv->count_frames + 1 ) * job->vrate_base ) / 300 -
524 /* We don't want the input & output clocks to be exactly in phase
525 * otherwise small variations in the time will cause us to think
526 * we're a full frame off & there will be lots of drops and dups.
527 * We offset the input clock by half the duration so it's maximally
528 * out of phase with the output clock. */
529 if( cur->start < pv->next_start - ( duration >> 1 ) )
531 /* current frame too old - drop it */
534 pv->chap_mark = cur->new_chap;
536 hb_buffer_close( &cur );
537 pv->cur = cur = hb_fifo_get( job->fifo_raw );
538 pv->next_pts = next->start;
543 if( next->start > pv->next_start + duration + ( duration >> 1 ) )
545 /* next frame too far ahead - dup current frame */
546 buf_tmp = hb_buffer_init( cur->size );
547 hb_buffer_copy_settings( buf_tmp, cur );
548 memcpy( buf_tmp->data, cur->data, cur->size );
549 buf_tmp->sequence = cur->sequence;
554 /* this frame in our time window & doesn't need to be duped */
556 pv->cur = cur = hb_fifo_get( job->fifo_raw );
557 pv->next_pts = next->start;
563 * Adjust the pts of the current frame so that it's contiguous
564 * with the previous frame. The start time of the current frame
565 * has to be the end time of the previous frame and the stop
566 * time has to be the start of the next frame. We don't
567 * make any adjustments to the source timestamps other than removing
568 * the clock offsets (which also removes pts discontinuities).
569 * This means we automatically encode at the source's frame rate.
570 * MP2 uses an implicit duration (frames end when the next frame
571 * starts) but more advanced containers like MP4 use an explicit
572 * duration. Since we're looking ahead one frame we set the
573 * explicit stop time from the start time of the next frame.
576 pv->cur = cur = hb_fifo_get( job->fifo_raw );
577 pv->next_pts = cur->start;
578 duration = cur->start - buf_tmp->start;
581 hb_log( "sync: invalid video duration %lld, start %lld, next %lld",
582 duration, buf_tmp->start, next->start );
586 buf_tmp->start = pv->next_start;
587 pv->next_start += duration;
588 buf_tmp->stop = pv->next_start;
592 // we have a pending chapter mark from a recent drop - put it on this
593 // buffer (this may make it one frame late but we can't do any better).
594 buf_tmp->new_chap = pv->chap_mark;
598 /* If we have a subtitle for this picture, copy it */
599 /* FIXME: we should avoid this memcpy */
602 buf_tmp->sub = hb_buffer_init( sub->size );
603 buf_tmp->sub->x = sub->x;
604 buf_tmp->sub->y = sub->y;
605 buf_tmp->sub->width = sub->width;
606 buf_tmp->sub->height = sub->height;
607 memcpy( buf_tmp->sub->data, sub->data, sub->size );
610 /* Push the frame to the renderer */
611 hb_fifo_push( job->fifo_sync, buf_tmp );
616 /* Make sure we won't get more frames then expected */
617 if( pv->count_frames >= pv->count_frames_max * 2)
619 hb_log( "sync: got too many frames (%d), exiting early",
623 // Drop an empty buffer into our output to ensure that things
624 // get flushed all the way out.
625 hb_fifo_push( job->fifo_sync, hb_buffer_init( 0 ) );
632 static void OutputAudioFrame( hb_job_t *job, hb_audio_t *audio, hb_buffer_t *buf,
633 hb_sync_audio_t *sync, hb_fifo_t *fifo, int i )
635 int64_t start = sync->next_start;
636 int64_t duration = buf->stop - buf->start;
638 sync->next_pts += duration;
640 if( audio->config.in.samplerate == audio->config.out.samplerate ||
641 audio->config.out.codec == HB_ACODEC_AC3 ||
642 audio->config.out.codec == HB_ACODEC_DCA )
645 * If we don't have to do sample rate conversion or this audio is
646 * pass-thru just send the input buffer downstream after adjusting
647 * its timestamps to make the output stream continuous.
652 /* Not pass-thru - do sample rate conversion */
653 int count_in, count_out;
654 hb_buffer_t * buf_raw = buf;
655 int channel_count = HB_AMIXDOWN_GET_DISCRETE_CHANNEL_COUNT(audio->config.out.mixdown) *
658 count_in = buf_raw->size / channel_count;
660 * When using stupid rates like 44.1 there will always be some
661 * truncation error. E.g., a 1536 sample AC3 frame will turn into a
662 * 1536*44.1/48.0 = 1411.2 sample frame. If we just truncate the .2
663 * the error will build up over time and eventually the audio will
664 * substantially lag the video. libsamplerate will keep track of the
665 * fractional sample & give it to us when appropriate if we give it
666 * an extra sample of space in the output buffer.
668 count_out = ( duration * audio->config.out.samplerate ) / 90000 + 1;
670 sync->data.input_frames = count_in;
671 sync->data.output_frames = count_out;
672 sync->data.src_ratio = (double)audio->config.out.samplerate /
673 (double)audio->config.in.samplerate;
675 buf = hb_buffer_init( count_out * channel_count );
676 sync->data.data_in = (float *) buf_raw->data;
677 sync->data.data_out = (float *) buf->data;
678 if( src_process( sync->state, &sync->data ) )
680 /* XXX If this happens, we're screwed */
681 hb_log( "sync: audio %d src_process failed", i );
683 hb_buffer_close( &buf_raw );
685 buf->size = sync->data.output_frames_gen * channel_count;
686 duration = ( sync->data.output_frames_gen * 90000 ) /
687 audio->config.out.samplerate;
689 buf->frametype = HB_FRAME_AUDIO;
691 buf->stop = start + duration;
692 sync->next_start = start + duration;
693 hb_fifo_push( fifo, buf );
696 /***********************************************************************
698 ***********************************************************************
700 **********************************************************************/
701 static void SyncAudio( hb_work_object_t * w, int i )
703 hb_work_private_t * pv = w->private_data;
704 hb_job_t * job = pv->job;
705 hb_sync_audio_t * sync = &pv->sync_audio[i];
706 hb_audio_t * audio = sync->audio;
710 if( audio->config.out.codec == HB_ACODEC_AC3 )
712 fifo = audio->priv.fifo_out;
716 fifo = audio->priv.fifo_sync;
719 while( !hb_fifo_is_full( fifo ) && ( buf = hb_fifo_see( audio->priv.fifo_raw ) ) )
721 /* if the next buffer is an eof send it downstream */
722 if ( buf->size <= 0 )
724 buf = hb_fifo_get( audio->priv.fifo_raw );
725 hb_fifo_push( fifo, buf );
728 if ( (int64_t)( buf->start - sync->next_pts ) < 0 )
730 // audio time went backwards.
731 // If our output clock is more than a half frame ahead of the
732 // input clock drop this frame to move closer to sync.
733 // Otherwise drop frames until the input clock matches the output clock.
734 if ( sync->first_drop || sync->next_start - buf->start > 90*15 )
736 // Discard data that's in the past.
737 if ( sync->first_drop == 0 )
739 sync->first_drop = sync->next_pts;
742 buf = hb_fifo_get( audio->priv.fifo_raw );
743 hb_buffer_close( &buf );
746 sync->next_pts = buf->start;
748 if ( sync->first_drop )
750 // we were dropping old data but input buf time is now current
751 hb_log( "sync: audio %d time went backwards %d ms, dropped %d frames "
752 "(next %lld, current %lld)", i,
753 (int)( sync->next_pts - sync->first_drop ) / 90,
754 sync->drop_count, sync->first_drop, sync->next_pts );
755 sync->first_drop = 0;
756 sync->drop_count = 0;
757 sync->next_pts = buf->start;
759 if ( buf->start - sync->next_pts >= (90 * 70) )
762 * there's a gap of at least 70ms between the last
763 * frame we processed & the next. Fill it with silence.
765 hb_log( "sync: adding %d ms of silence to audio %d"
766 " start %lld, next %lld",
767 (int)((buf->start - sync->next_pts) / 90),
768 i, buf->start, sync->next_pts );
769 InsertSilence( w, i, buf->start - sync->next_pts );
774 * When we get here we've taken care of all the dups and gaps in the
775 * audio stream and are ready to inject the next input frame into
778 buf = hb_fifo_get( audio->priv.fifo_raw );
779 OutputAudioFrame( job, audio, buf, sync, fifo, i );
783 static void InsertSilence( hb_work_object_t * w, int i, int64_t duration )
785 hb_work_private_t * pv = w->private_data;
786 hb_job_t *job = pv->job;
787 hb_sync_audio_t *sync = &pv->sync_audio[i];
791 // to keep pass-thru and regular audio in sync we generate silence in
792 // AC3 frame-sized units. If the silence duration isn't an integer multiple
793 // of the AC3 frame duration we will truncate or round up depending on
794 // which minimizes the timing error.
795 const int frame_dur = ( 90000 * AC3_SAMPLES_PER_FRAME ) /
796 sync->audio->config.in.samplerate;
797 int frame_count = ( duration + (frame_dur >> 1) ) / frame_dur;
799 while ( --frame_count >= 0 )
801 if( sync->audio->config.out.codec == HB_ACODEC_AC3 )
803 buf = hb_buffer_init( sync->ac3_size );
804 buf->start = sync->next_pts;
805 buf->stop = buf->start + frame_dur;
806 memcpy( buf->data, sync->ac3_buf, buf->size );
807 fifo = sync->audio->priv.fifo_out;
811 buf = hb_buffer_init( AC3_SAMPLES_PER_FRAME * sizeof( float ) *
812 HB_AMIXDOWN_GET_DISCRETE_CHANNEL_COUNT(
813 sync->audio->config.out.mixdown) );
814 buf->start = sync->next_pts;
815 buf->stop = buf->start + frame_dur;
816 memset( buf->data, 0, buf->size );
817 fifo = sync->audio->priv.fifo_sync;
819 OutputAudioFrame( job, sync->audio, buf, sync, fifo, i );
823 static void UpdateState( hb_work_object_t * w )
825 hb_work_private_t * pv = w->private_data;
828 if( !pv->count_frames )
830 pv->st_first = hb_get_date();
834 if( hb_get_date() > pv->st_dates[3] + 1000 )
836 memmove( &pv->st_dates[0], &pv->st_dates[1],
837 3 * sizeof( uint64_t ) );
838 memmove( &pv->st_counts[0], &pv->st_counts[1],
839 3 * sizeof( uint64_t ) );
840 pv->st_dates[3] = hb_get_date();
841 pv->st_counts[3] = pv->count_frames;
844 #define p state.param.working
845 state.state = HB_STATE_WORKING;
846 p.progress = (float) pv->count_frames / (float) pv->count_frames_max;
847 if( p.progress > 1.0 )
851 p.rate_cur = 1000.0 *
852 (float) ( pv->st_counts[3] - pv->st_counts[0] ) /
853 (float) ( pv->st_dates[3] - pv->st_dates[0] );
854 if( hb_get_date() > pv->st_first + 4000 )
857 p.rate_avg = 1000.0 * (float) pv->st_counts[3] /
858 (float) ( pv->st_dates[3] - pv->st_first );
859 eta = (float) ( pv->count_frames_max - pv->st_counts[3] ) /
861 p.hours = eta / 3600;
862 p.minutes = ( eta % 3600 ) / 60;
863 p.seconds = eta % 60;
874 hb_set_state( pv->job->h, &state );