libhb/decavcodec.c

   1 /* $Id: decavcodec.c,v 1.6 2005/03/06 04:08:54 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 /* This module is Handbrake's interface to the ffmpeg decoder library
   8    (libavcodec & small parts of libavformat). It contains four Handbrake
   9    "work objects":
  10
  11     decavcodec  connects HB to an ffmpeg audio decoder
  12     decavcodecv connects HB to an ffmpeg video decoder
  13
  14         (Two different routines are needed because the ffmpeg library
  15         has different decoder calling conventions for audio & video.
  16         The audio decoder should have had its name changed to "decavcodeca"
  17         but I got lazy.) These work objects are self-contained & follow all
  18         of HB's conventions for a decoder module. They can be used like
  19         any other HB decoder (deca52, decmpeg2, etc.).
  20
  21     decavcodecai "internal" (incestuous?) version of decavcodec
  22     decavcodecvi "internal" (incestuous?) version of decavcodecv
  23
  24         These routine are functionally equivalent to the routines above but
  25         can only be used by the ffmpeg-based stream reader in libhb/stream.c.
  26         The reason they exist is because the ffmpeg library leaves some of
  27         the information needed by the decoder in the AVStream (the data
  28         structure used by the stream reader) and we need to retrieve it
  29         to successfully decode frames. But in HB the reader and decoder
  30         modules are in completely separate threads and nothing goes between
  31         them but hb_buffers containing frames to be decoded. I.e., there's
  32         no easy way for the ffmpeg stream reader to pass a pointer to its
  33         AVStream over to the ffmpeg video or audio decoder. So the *i work
  34         objects use a private back door to the stream reader to get access
  35         to the AVStream (routines hb_ffmpeg_avstream and hb_ffmpeg_context)
  36         and the codec_param passed to these work objects is the key to this
  37         back door (it's basically an index that allows the correct AVStream
  38         to be retrieved).
  39
  40     The normal & *i objects share a lot of code (the basic frame decoding
  41     and bitstream info code is factored out into subroutines that can be
  42     called by either) but the top level routines of the *i objects
  43     (decavcodecviWork, decavcodecviInfo, etc.) are different because:
  44      1) they *have* to use the AVCodecContext that's contained in the
  45         reader's AVStream rather than just allocating & using their own,
  46      2) the Info routines have access to stuff kept in the AVStream in addition
  47         to stuff kept in the AVCodecContext. This shouldn't be necessary but
  48         crucial information like video frame rate that should be in the
  49         AVCodecContext is either missing or wrong in the version of ffmpeg
  50         we're currently using.
  51
  52     A consequence of the above is that the non-i work objects *can't* use
  53     information from the AVStream because there isn't one - they get their
  54     data from either the dvd reader or the mpeg reader, not the ffmpeg stream
  55     reader. That means that they have to make up for deficiencies in the
  56     AVCodecContext info by using stuff kept in the HB "title" struct. It
  57     also means that ffmpeg codecs that randomly scatter state needed by
  58     the decoder across both the AVCodecContext & the AVStream (e.g., the
  59     VC1 decoder) can't easily be used by the HB mpeg stream reader.
  60  */
  61
  62 #include "hb.h"
  63 #include "hbffmpeg.h"
  64 #include "libavcodec/audioconvert.h"
  65
  66 static int  decavcodecInit( hb_work_object_t *, hb_job_t * );
  67 static int  decavcodecWork( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  68 static void decavcodecClose( hb_work_object_t * );
  69 static int decavcodecInfo( hb_work_object_t *, hb_work_info_t * );
  70 static int decavcodecBSInfo( hb_work_object_t *, const hb_buffer_t *, hb_work_info_t * );
  71
  72 hb_work_object_t hb_decavcodec =
  73 {
  74     WORK_DECAVCODEC,
  75     "MPGA decoder (libavcodec)",
  76     decavcodecInit,
  77     decavcodecWork,
  78     decavcodecClose,
  79     decavcodecInfo,
  80     decavcodecBSInfo
  81 };
  82
  83 #define HEAP_SIZE 8
  84 typedef struct {
  85     // there are nheap items on the heap indexed 1..nheap (i.e., top of
  86     // heap is 1). The 0th slot is unused - a marker is put there to check
  87     // for overwrite errs.
  88     int64_t h[HEAP_SIZE+1];
  89     int     nheap;
  90 } pts_heap_t;
  91
  92 struct hb_work_private_s
  93 {
  94     hb_job_t        *job;
  95     AVCodecContext  *context;
  96     AVCodecParserContext *parser;
  97     hb_list_t       *list;
  98     double          duration;   // frame duration (for video)
  99     double          pts_next;   // next pts we expect to generate
 100     int64_t         pts;        // (video) pts passing from parser to decoder
 101     int64_t         chap_time;  // time of next chap mark (if new_chap != 0)
 102     int             new_chap;   // output chapter mark pending
 103     uint32_t        nframes;
 104     uint32_t        ndrops;
 105     uint32_t        decode_errors;
 106     int             brokenByMicrosoft; // video stream may contain packed b-frames
 107     hb_buffer_t*    delayq[HEAP_SIZE];
 108     pts_heap_t      pts_heap;
 109     void*           buffer;
 110     struct SwsContext *sws_context; // if we have to rescale or convert color space
 111 };
 112
 113 static int64_t heap_pop( pts_heap_t *heap )
 114 {
 115     int64_t result;
 116
 117     if ( heap->nheap <= 0 )
 118     {
 119         return -1;
 120     }
 121
 122     // return the top of the heap then put the bottom element on top,
 123     // decrease the heap size by one & rebalence the heap.
 124     result = heap->h[1];
 125
 126     int64_t v = heap->h[heap->nheap--];
 127     int parent = 1;
 128     int child = parent << 1;
 129     while ( child <= heap->nheap )
 130     {
 131         // find the smallest of the two children of parent
 132         if (child < heap->nheap && heap->h[child] > heap->h[child+1] )
 133             ++child;
 134
 135         if (v <= heap->h[child])
 136             // new item is smaller than either child so it's the new parent.
 137             break;
 138
 139         // smallest child is smaller than new item so move it up then
 140         // check its children.
 141         int64_t hp = heap->h[child];
 142         heap->h[parent] = hp;
 143         parent = child;
 144         child = parent << 1;
 145     }
 146     heap->h[parent] = v;
 147     return result;
 148 }
 149
 150 static void heap_push( pts_heap_t *heap, int64_t v )
 151 {
 152     if ( heap->nheap < HEAP_SIZE )
 153     {
 154         ++heap->nheap;
 155     }
 156
 157     // stick the new value on the bottom of the heap then bubble it
 158     // up to its correct spot.
 159         int child = heap->nheap;
 160         while (child > 1) {
 161                 int parent = child >> 1;
 162                 if (heap->h[parent] <= v)
 163                         break;
 164                 // move parent down
 165                 int64_t hp = heap->h[parent];
 166                 heap->h[child] = hp;
 167                 child = parent;
 168         }
 169         heap->h[child] = v;
 170 }
 171
 172
 173 /***********************************************************************
 174  * hb_work_decavcodec_init
 175  ***********************************************************************
 176  *
 177  **********************************************************************/
 178 static int decavcodecInit( hb_work_object_t * w, hb_job_t * job )
 179 {
 180     AVCodec * codec;
 181
 182     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
 183     w->private_data = pv;
 184
 185     pv->job   = job;
 186
 187     int codec_id = w->codec_param;
 188     /*XXX*/
 189     if ( codec_id == 0 )
 190         codec_id = CODEC_ID_MP2;
 191
 192     codec = avcodec_find_decoder( codec_id );
 193     pv->parser = av_parser_init( codec_id );
 194
 195     pv->context = avcodec_alloc_context();
 196     hb_avcodec_open( pv->context, codec );
 197
 198     return 0;
 199 }
 200
 201 /***********************************************************************
 202  * Close
 203  ***********************************************************************
 204  *
 205  **********************************************************************/
 206 static void decavcodecClose( hb_work_object_t * w )
 207 {
 208     hb_work_private_t * pv = w->private_data;
 209
 210     if ( pv )
 211     {
 212         if ( pv->job && pv->context && pv->context->codec )
 213         {
 214             hb_log( "%s-decoder done: %u frames, %u decoder errors, %u drops",
 215                     pv->context->codec->name, pv->nframes, pv->decode_errors,
 216                     pv->ndrops );
 217         }
 218         if ( pv->sws_context )
 219         {
 220             sws_freeContext( pv->sws_context );
 221         }
 222         if ( pv->parser )
 223         {
 224             av_parser_close(pv->parser);
 225         }
 226         if ( pv->context && pv->context->codec )
 227         {
 228             hb_avcodec_close( pv->context );
 229         }
 230         if ( pv->list )
 231         {
 232             hb_list_close( &pv->list );
 233         }
 234         if ( pv->buffer )
 235         {
 236             av_free( pv->buffer );
 237             pv->buffer = NULL;
 238         }
 239         free( pv );
 240         w->private_data = NULL;
 241     }
 242 }
 243
 244 /***********************************************************************
 245  * Work
 246  ***********************************************************************
 247  *
 248  **********************************************************************/
 249 static int decavcodecWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 250                     hb_buffer_t ** buf_out )
 251 {
 252     hb_work_private_t * pv = w->private_data;
 253     hb_buffer_t * in = *buf_in, * buf, * last = NULL;
 254     int   pos, len, out_size, i, uncompressed_len;
 255     short* bufaligned;
 256     uint64_t cur;
 257     unsigned char *parser_output_buffer;
 258     int parser_output_buffer_len;
 259
 260     if ( (*buf_in)->size <= 0 )
 261     {
 262         /* EOF on input stream - send it downstream & say that we're done */
 263         *buf_out = *buf_in;
 264         *buf_in = NULL;
 265         return HB_WORK_DONE;
 266     }
 267
 268     *buf_out = NULL;
 269
 270     if ( in->start < -1 && pv->pts_next <= 0 )
 271     {
 272         // discard buffers that start before video time 0
 273         return HB_WORK_OK;
 274     }
 275
 276     cur = ( in->start < 0 )? pv->pts_next : in->start;
 277
 278     bufaligned = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 279     pos = 0;
 280     while( pos < in->size )
 281     {
 282         len = av_parser_parse2( pv->parser, pv->context,
 283                                 &parser_output_buffer, &parser_output_buffer_len,
 284                                 in->data + pos, in->size - pos, cur, cur, AV_NOPTS_VALUE );
 285         out_size = 0;
 286         uncompressed_len = 0;
 287         if (parser_output_buffer_len)
 288         {
 289             AVPacket avp;
 290             av_init_packet( &avp );
 291             avp.data = parser_output_buffer;
 292             avp.size = parser_output_buffer_len;
 293
 294             out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 295             uncompressed_len = avcodec_decode_audio3( pv->context, bufaligned, &out_size, &avp );
 296         }
 297         if( out_size )
 298         {
 299             short * s16;
 300             float * fl32;
 301
 302             buf = hb_buffer_init( 2 * out_size );
 303
 304             int sample_size_in_bytes = 2;   // Default to 2 bytes
 305             switch (pv->context->sample_fmt)
 306             {
 307               case SAMPLE_FMT_S16:
 308                 sample_size_in_bytes = 2;
 309                 break;
 310               /* We should handle other formats here - but that needs additional format conversion work below */
 311               /* For now we'll just report the error and try to carry on */
 312               default:
 313                 hb_log("decavcodecWork - Unknown Sample Format from avcodec_decode_audio (%d) !", pv->context->sample_fmt);
 314                 break;
 315             }
 316
 317             buf->start = cur;
 318             buf->stop  = cur + 90000 * ( out_size / (sample_size_in_bytes * pv->context->channels) ) /
 319                          pv->context->sample_rate;
 320             cur = buf->stop;
 321
 322             s16  = bufaligned;
 323             fl32 = (float *) buf->data;
 324             for( i = 0; i < out_size / 2; i++ )
 325             {
 326                 fl32[i] = s16[i];
 327             }
 328
 329             if( last )
 330             {
 331                 last = last->next = buf;
 332             }
 333             else
 334             {
 335                 *buf_out = last = buf;
 336             }
 337         }
 338
 339         pos += len;
 340     }
 341
 342     pv->pts_next = cur;
 343
 344     av_free( bufaligned );
 345     return HB_WORK_OK;
 346 }
 347
 348 static int decavcodecInfo( hb_work_object_t *w, hb_work_info_t *info )
 349 {
 350     hb_work_private_t *pv = w->private_data;
 351
 352     memset( info, 0, sizeof(*info) );
 353
 354     if ( pv && pv->context )
 355     {
 356         AVCodecContext *context = pv->context;
 357         info->bitrate = context->bit_rate;
 358         info->rate = context->time_base.num;
 359         info->rate_base = context->time_base.den;
 360         info->profile = context->profile;
 361         info->level = context->level;
 362         return 1;
 363     }
 364     return 0;
 365 }
 366
 367 static const int chan2layout[] = {
 368     HB_INPUT_CH_LAYOUT_MONO,  // We should allow no audio really.
 369     HB_INPUT_CH_LAYOUT_MONO,
 370     HB_INPUT_CH_LAYOUT_STEREO,
 371     HB_INPUT_CH_LAYOUT_2F1R,
 372     HB_INPUT_CH_LAYOUT_2F2R,
 373     HB_INPUT_CH_LAYOUT_3F2R,
 374     HB_INPUT_CH_LAYOUT_4F2R,
 375     HB_INPUT_CH_LAYOUT_STEREO,
 376     HB_INPUT_CH_LAYOUT_STEREO,
 377 };
 378
 379 static int decavcodecBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 380                              hb_work_info_t *info )
 381 {
 382     hb_work_private_t *pv = w->private_data;
 383     int ret = 0;
 384
 385     memset( info, 0, sizeof(*info) );
 386
 387     if ( pv && pv->context )
 388     {
 389         return decavcodecInfo( w, info );
 390     }
 391     // XXX
 392     // We should parse the bitstream to find its parameters but for right
 393     // now we just return dummy values if there's a codec that will handle it.
 394     AVCodec *codec = avcodec_find_decoder( w->codec_param? w->codec_param :
 395                                                            CODEC_ID_MP2 );
 396     if ( ! codec )
 397     {
 398         // there's no ffmpeg codec for this audio type - give up
 399         return -1;
 400     }
 401
 402     static char codec_name[64];
 403     info->name =  strncpy( codec_name, codec->name, sizeof(codec_name)-1 );
 404
 405     AVCodecParserContext *parser = av_parser_init( codec->id );
 406     AVCodecContext *context = avcodec_alloc_context();
 407     hb_avcodec_open( context, codec );
 408     uint8_t *buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
 409     int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
 410     unsigned char *pbuffer;
 411     int pos = 0, pbuffer_size;
 412
 413     while ( pos < buf->size )
 414     {
 415         int len = av_parser_parse2( parser, context, &pbuffer, &pbuffer_size,
 416                                     buf->data + pos, buf->size - pos,
 417                                     buf->start, buf->start, AV_NOPTS_VALUE );
 418         pos += len;
 419         if ( pbuffer_size > 0 )
 420         {
 421             AVPacket avp;
 422             av_init_packet( &avp );
 423             avp.data = pbuffer;
 424             avp.size = pbuffer_size;
 425
 426             len = avcodec_decode_audio3( context, (int16_t*)buffer, &out_size, &avp );
 427             if ( len > 0 && context->sample_rate > 0 )
 428             {
 429                 info->bitrate = context->bit_rate;
 430                 info->rate = context->sample_rate;
 431                 info->rate_base = 1;
 432                 info->channel_layout = chan2layout[context->channels & 7];
 433                 ret = 1;
 434                 break;
 435             }
 436         }
 437     }
 438     av_free( buffer );
 439     av_parser_close( parser );
 440     hb_avcodec_close( context );
 441     return ret;
 442 }
 443
 444 /* -------------------------------------------------------------
 445  * General purpose video decoder using libavcodec
 446  */
 447
 448 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
 449                             int h )
 450 {
 451     if ( dstride == sstride )
 452     {
 453         memcpy( dst, src, dstride * h );
 454         return dst + dstride * h;
 455     }
 456     int lbytes = dstride <= sstride? dstride : sstride;
 457     while ( --h >= 0 )
 458     {
 459         memcpy( dst, src, lbytes );
 460         src += sstride;
 461         dst += dstride;
 462     }
 463     return dst;
 464 }
 465
 466 // copy one video frame into an HB buf. If the frame isn't in our color space
 467 // or at least one of its dimensions is odd, use sws_scale to convert/rescale it.
 468 // Otherwise just copy the bits.
 469 static hb_buffer_t *copy_frame( hb_work_private_t *pv, AVFrame *frame )
 470 {
 471     AVCodecContext *context = pv->context;
 472     int w, h;
 473     if ( ! pv->job )
 474     {
 475         // if the dimensions are odd, drop the lsb since h264 requires that
 476         // both width and height be even.
 477         w = ( context->width >> 1 ) << 1;
 478         h = ( context->height >> 1 ) << 1;
 479     }
 480     else
 481     {
 482         w =  pv->job->title->width;
 483         h =  pv->job->title->height;
 484     }
 485     hb_buffer_t *buf = hb_video_buffer_init( w, h );
 486     uint8_t *dst = buf->data;
 487
 488     if ( context->pix_fmt != PIX_FMT_YUV420P || w != context->width ||
 489          h != context->height )
 490     {
 491         // have to convert to our internal color space and/or rescale
 492         AVPicture dstpic;
 493         avpicture_fill( &dstpic, dst, PIX_FMT_YUV420P, w, h );
 494
 495         if ( ! pv->sws_context )
 496         {
 497             pv->sws_context = sws_getContext( context->width, context->height, context->pix_fmt,
 498                                               w, h, PIX_FMT_YUV420P,
 499                                               SWS_LANCZOS|SWS_ACCURATE_RND,
 500                                               NULL, NULL, NULL );
 501         }
 502         sws_scale( pv->sws_context, frame->data, frame->linesize, 0, h,
 503                    dstpic.data, dstpic.linesize );
 504     }
 505     else
 506     {
 507         dst = copy_plane( dst, frame->data[0], w, frame->linesize[0], h );
 508         w = (w + 1) >> 1; h = (h + 1) >> 1;
 509         dst = copy_plane( dst, frame->data[1], w, frame->linesize[1], h );
 510         dst = copy_plane( dst, frame->data[2], w, frame->linesize[2], h );
 511     }
 512     return buf;
 513 }
 514
 515 static int get_frame_buf( AVCodecContext *context, AVFrame *frame )
 516 {
 517     hb_work_private_t *pv = context->opaque;
 518     frame->pts = pv->pts;
 519     pv->pts = -1;
 520     return avcodec_default_get_buffer( context, frame );
 521 }
 522
 523 static void log_chapter( hb_work_private_t *pv, int chap_num, int64_t pts )
 524 {
 525     hb_chapter_t *c = hb_list_item( pv->job->title->list_chapter, chap_num - 1 );
 526     if ( c && c->title )
 527     {
 528         hb_log( "%s: \"%s\" (%d) at frame %u time %"PRId64,
 529                 pv->context->codec->name, c->title, chap_num, pv->nframes, pts );
 530     }
 531     else
 532     {
 533         hb_log( "%s: Chapter %d at frame %u time %"PRId64,
 534                 pv->context->codec->name, chap_num, pv->nframes, pts );
 535     }
 536 }
 537
 538 static void flushDelayQueue( hb_work_private_t *pv )
 539 {
 540     hb_buffer_t *buf;
 541     int slot = pv->nframes & (HEAP_SIZE-1);
 542
 543     // flush all the video packets left on our timestamp-reordering delay q
 544     while ( ( buf = pv->delayq[slot] ) != NULL )
 545     {
 546         buf->start = heap_pop( &pv->pts_heap );
 547         hb_list_add( pv->list, buf );
 548         pv->delayq[slot] = NULL;
 549         slot = ( slot + 1 ) & (HEAP_SIZE-1);
 550     }
 551 }
 552
 553 static int decodeFrame( hb_work_private_t *pv, uint8_t *data, int size )
 554 {
 555     int got_picture, oldlevel = 0;
 556     AVFrame frame;
 557     AVPacket avp;
 558
 559     if ( global_verbosity_level <= 1 )
 560     {
 561         oldlevel = av_log_get_level();
 562         av_log_set_level( AV_LOG_QUIET );
 563     }
 564
 565     av_init_packet( &avp );
 566     avp.data = data;
 567     avp.size = size;
 568     if ( avcodec_decode_video2( pv->context, &frame, &got_picture, &avp ) < 0 )
 569     {
 570         ++pv->decode_errors;
 571     }
 572     if ( global_verbosity_level <= 1 )
 573     {
 574         av_log_set_level( oldlevel );
 575     }
 576     if( got_picture )
 577     {
 578         // ffmpeg makes it hard to attach a pts to a frame. if the MPEG ES
 579         // packet had a pts we handed it to av_parser_parse (if the packet had
 580         // no pts we set it to -1 but before the parse we can't distinguish between
 581         // the start of a video frame with no pts & an intermediate packet of
 582         // some frame which never has a pts). we hope that when parse returns
 583         // the frame to us the pts we originally handed it will be in parser->pts.
 584         // we put this pts into pv->pts so that when a avcodec_decode_video
 585         // finally gets around to allocating an AVFrame to hold the decoded
 586         // frame we can stuff that pts into the frame. if all of these relays
 587         // worked at this point frame.pts should hold the frame's pts from the
 588         // original data stream or -1 if it didn't have one. in the latter case
 589         // we generate the next pts in sequence for it.
 590         double frame_dur = pv->duration;
 591         if ( frame_dur <= 0 )
 592         {
 593             frame_dur = 90000. * (double)pv->context->time_base.num /
 594                         (double)pv->context->time_base.den;
 595             pv->duration = frame_dur;
 596         }
 597         if ( frame.repeat_pict )
 598         {
 599             frame_dur += frame.repeat_pict * frame_dur * 0.5;
 600         }
 601         // XXX Unlike every other video decoder, the Raw decoder doesn't
 602         //     use the standard buffer allocation routines so we never
 603         //     get to put a PTS in the frame. Do it now.
 604         if ( pv->context->codec_id == CODEC_ID_RAWVIDEO )
 605         {
 606             frame.pts = pv->pts;
 607             pv->pts = -1;
 608         }
 609         // If there was no pts for this frame, assume constant frame rate
 610         // video & estimate the next frame time from the last & duration.
 611         double pts = frame.pts;
 612         if ( pts < 0 )
 613         {
 614             pts = pv->pts_next;
 615         }
 616         pv->pts_next = pts + frame_dur;
 617
 618         hb_buffer_t *buf;
 619
 620         // if we're doing a scan or this content couldn't have been broken
 621         // by Microsoft we don't worry about timestamp reordering
 622         if ( ! pv->job || ! pv->brokenByMicrosoft )
 623         {
 624             buf = copy_frame( pv, &frame );
 625             buf->start = pts;
 626             hb_list_add( pv->list, buf );
 627             ++pv->nframes;
 628             return got_picture;
 629         }
 630
 631         // XXX This following probably addresses a libavcodec bug but I don't
 632         //     see an easy fix so we workaround it here.
 633         //
 634         // The M$ 'packed B-frames' atrocity results in decoded frames with
 635         // the wrong timestamp. E.g., if there are 2 b-frames the timestamps
 636         // we see here will be "2 3 1 5 6 4 ..." instead of "1 2 3 4 5 6".
 637         // The frames are actually delivered in the right order but with
 638         // the wrong timestamp. To get the correct timestamp attached to
 639         // each frame we have a delay queue (longer than the max number of
 640         // b-frames) & a sorting heap for the timestamps. As each frame
 641         // comes out of the decoder the oldest frame in the queue is removed
 642         // and associated with the smallest timestamp. Then the new frame is
 643         // added to the queue & its timestamp is pushed on the heap.
 644         // This does nothing if the timestamps are correct (i.e., the video
 645         // uses a codec that Micro$oft hasn't broken yet) but the frames
 646         // get timestamped correctly even when M$ has munged them.
 647
 648         // remove the oldest picture from the frame queue (if any) &
 649         // give it the smallest timestamp from our heap. The queue size
 650         // is a power of two so we get the slot of the oldest by masking
 651         // the frame count & this will become the slot of the newest
 652         // once we've removed & processed the oldest.
 653         int slot = pv->nframes & (HEAP_SIZE-1);
 654         if ( ( buf = pv->delayq[slot] ) != NULL )
 655         {
 656             buf->start = heap_pop( &pv->pts_heap );
 657
 658             if ( pv->new_chap && buf->start >= pv->chap_time )
 659             {
 660                 buf->new_chap = pv->new_chap;
 661                 pv->new_chap = 0;
 662                 pv->chap_time = 0;
 663                 log_chapter( pv, buf->new_chap, buf->start );
 664             }
 665             else if ( pv->nframes == 0 )
 666             {
 667                 log_chapter( pv, pv->job->chapter_start, buf->start );
 668             }
 669             hb_list_add( pv->list, buf );
 670         }
 671
 672         // add the new frame to the delayq & push its timestamp on the heap
 673         pv->delayq[slot] = copy_frame( pv, &frame );
 674         heap_push( &pv->pts_heap, pts );
 675
 676         ++pv->nframes;
 677     }
 678
 679     return got_picture;
 680 }
 681
 682 static void decodeVideo( hb_work_private_t *pv, uint8_t *data, int size,
 683                          int64_t pts, int64_t dts )
 684 {
 685     /*
 686      * The following loop is a do..while because we need to handle both
 687      * data & the flush at the end (signaled by size=0). At the end there's
 688      * generally a frame in the parser & one or more frames in the decoder
 689      * (depending on the bframes setting).
 690      */
 691     int pos = 0;
 692     do {
 693         uint8_t *pout;
 694         int pout_len;
 695         int len = av_parser_parse2( pv->parser, pv->context, &pout, &pout_len,
 696                                     data + pos, size - pos, pts, dts, AV_NOPTS_VALUE );
 697         pos += len;
 698
 699         if ( pout_len > 0 )
 700         {
 701             pv->pts = pv->parser->pts;
 702             decodeFrame( pv, pout, pout_len );
 703         }
 704     } while ( pos < size );
 705
 706     /* the stuff above flushed the parser, now flush the decoder */
 707     if ( size <= 0 )
 708     {
 709         while ( decodeFrame( pv, NULL, 0 ) )
 710         {
 711         }
 712         flushDelayQueue( pv );
 713     }
 714 }
 715
 716 static hb_buffer_t *link_buf_list( hb_work_private_t *pv )
 717 {
 718     hb_buffer_t *head = hb_list_item( pv->list, 0 );
 719
 720     if ( head )
 721     {
 722         hb_list_rem( pv->list, head );
 723
 724         hb_buffer_t *last = head, *buf;
 725
 726         while ( ( buf = hb_list_item( pv->list, 0 ) ) != NULL )
 727         {
 728             hb_list_rem( pv->list, buf );
 729             last->next = buf;
 730             last = buf;
 731         }
 732     }
 733     return head;
 734 }
 735
 736
 737 static int decavcodecvInit( hb_work_object_t * w, hb_job_t * job )
 738 {
 739
 740     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
 741     w->private_data = pv;
 742     pv->job   = job;
 743     pv->list = hb_list_init();
 744
 745     int codec_id = w->codec_param;
 746     pv->parser = av_parser_init( codec_id );
 747     pv->context = avcodec_alloc_context2( CODEC_TYPE_VIDEO );
 748
 749     /* we have to wrap ffmpeg's get_buffer to be able to set the pts (?!) */
 750     pv->context->opaque = pv;
 751     pv->context->get_buffer = get_frame_buf;
 752
 753     return 0;
 754 }
 755
 756 static int next_hdr( hb_buffer_t *in, int offset )
 757 {
 758     uint8_t *dat = in->data;
 759     uint16_t last2 = 0xffff;
 760     for ( ; in->size - offset > 1; ++offset )
 761     {
 762         if ( last2 == 0 && dat[offset] == 0x01 )
 763             // found an mpeg start code
 764             return offset - 2;
 765
 766         last2 = ( last2 << 8 ) | dat[offset];
 767     }
 768
 769     return -1;
 770 }
 771
 772 static int find_hdr( hb_buffer_t *in, int offset, uint8_t hdr_type )
 773 {
 774     if ( in->size - offset < 4 )
 775         // not enough room for an mpeg start code
 776         return -1;
 777
 778     for ( ; ( offset = next_hdr( in, offset ) ) >= 0; ++offset )
 779     {
 780         if ( in->data[offset+3] == hdr_type )
 781             // found it
 782             break;
 783     }
 784     return offset;
 785 }
 786
 787 static int setup_extradata( hb_work_object_t *w, hb_buffer_t *in )
 788 {
 789     hb_work_private_t *pv = w->private_data;
 790
 791     // we can't call the avstream funcs but the read_header func in the
 792     // AVInputFormat may set up some state in the AVContext. In particular
 793     // vc1t_read_header allocates 'extradata' to deal with header issues
 794     // related to Microsoft's bizarre engineering notions. We alloc a chunk
 795     // of space to make vc1 work then associate the codec with the context.
 796     if ( w->codec_param != CODEC_ID_VC1 )
 797     {
 798         // we haven't been inflicted with M$ - allocate a little space as
 799         // a marker and return success.
 800         pv->context->extradata_size = 16;
 801         pv->context->extradata = av_malloc(pv->context->extradata_size);
 802         return 0;
 803     }
 804
 805     // find the start and and of the sequence header
 806     int shdr, shdr_end;
 807     if ( ( shdr = find_hdr( in, 0, 0x0f ) ) < 0 )
 808     {
 809         // didn't find start of seq hdr
 810         return 1;
 811     }
 812     if ( ( shdr_end = next_hdr( in, shdr + 4 ) ) < 0 )
 813     {
 814         shdr_end = in->size;
 815     }
 816     shdr_end -= shdr;
 817
 818     // find the start and and of the entry point header
 819     int ehdr, ehdr_end;
 820     if ( ( ehdr = find_hdr( in, 0, 0x0e ) ) < 0 )
 821     {
 822         // didn't find start of entry point hdr
 823         return 1;
 824     }
 825     if ( ( ehdr_end = next_hdr( in, ehdr + 4 ) ) < 0 )
 826     {
 827         ehdr_end = in->size;
 828     }
 829     ehdr_end -= ehdr;
 830
 831     // found both headers - allocate an extradata big enough to hold both
 832     // then copy them into it.
 833     pv->context->extradata_size = shdr_end + ehdr_end;
 834     pv->context->extradata = av_malloc(pv->context->extradata_size + 8);
 835     memcpy( pv->context->extradata, in->data + shdr, shdr_end );
 836     memcpy( pv->context->extradata + shdr_end, in->data + ehdr, ehdr_end );
 837     memset( pv->context->extradata + shdr_end + ehdr_end, 0, 8);
 838     return 0;
 839 }
 840
 841 static int decavcodecvWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
 842                             hb_buffer_t ** buf_out )
 843 {
 844     hb_work_private_t *pv = w->private_data;
 845     hb_buffer_t *in = *buf_in;
 846     int64_t pts = AV_NOPTS_VALUE;
 847     int64_t dts = pts;
 848
 849     *buf_in = NULL;
 850
 851     /* if we got an empty buffer signaling end-of-stream send it downstream */
 852     if ( in->size == 0 )
 853     {
 854         decodeVideo( pv, in->data, in->size, pts, dts );
 855         hb_list_add( pv->list, in );
 856         *buf_out = link_buf_list( pv );
 857         return HB_WORK_DONE;
 858     }
 859
 860     // if this is the first frame open the codec (we have to wait for the
 861     // first frame because of M$ VC1 braindamage).
 862     if ( pv->context->extradata_size == 0 )
 863     {
 864         if ( setup_extradata( w, in ) )
 865         {
 866             // we didn't find the headers needed to set up extradata.
 867             // the codec will abort if we open it so just free the buf
 868             // and hope we eventually get the info we need.
 869             hb_buffer_close( &in );
 870             return HB_WORK_OK;
 871         }
 872         AVCodec *codec = avcodec_find_decoder( w->codec_param );
 873         // There's a mis-feature in ffmpeg that causes the context to be
 874         // incorrectly initialized the 1st time avcodec_open is called.
 875         // If you close it and open a 2nd time, it finishes the job.
 876         hb_avcodec_open( pv->context, codec );
 877         hb_avcodec_close( pv->context );
 878         hb_avcodec_open( pv->context, codec );
 879     }
 880
 881     if( in->start >= 0 )
 882     {
 883         pts = in->start;
 884         dts = in->renderOffset;
 885     }
 886     if ( in->new_chap )
 887     {
 888         pv->new_chap = in->new_chap;
 889         pv->chap_time = pts >= 0? pts : pv->pts_next;
 890     }
 891     decodeVideo( pv, in->data, in->size, pts, dts );
 892     hb_buffer_close( &in );
 893     *buf_out = link_buf_list( pv );
 894     return HB_WORK_OK;
 895 }
 896
 897 static int decavcodecvInfo( hb_work_object_t *w, hb_work_info_t *info )
 898 {
 899     hb_work_private_t *pv = w->private_data;
 900
 901     memset( info, 0, sizeof(*info) );
 902
 903     if ( pv && pv->context )
 904     {
 905         AVCodecContext *context = pv->context;
 906         info->bitrate = context->bit_rate;
 907         info->width = context->width;
 908         info->height = context->height;
 909
 910         /* ffmpeg gives the frame rate in frames per second while HB wants
 911          * it in units of the 27MHz MPEG clock. */
 912         info->rate = 27000000;
 913         info->rate_base = (int64_t)context->time_base.num * 27000000LL /
 914                           context->time_base.den;
 915         if ( context->ticks_per_frame > 1 )
 916         {
 917             // for ffmpeg 0.5 & later, the H.264 & MPEG-2 time base is
 918             // field rate rather than frame rate so convert back to frames.
 919             info->rate_base *= context->ticks_per_frame;
 920         }
 921
 922         info->pixel_aspect_width = context->sample_aspect_ratio.num;
 923         info->pixel_aspect_height = context->sample_aspect_ratio.den;
 924
 925         /* Sometimes there's no pixel aspect set in the source ffmpeg context
 926          * which appears to come from the video stream. In that case,
 927          * try the pixel aspect in AVStream (which appears to come from
 928          * the container). Else assume a 1:1 PAR. */
 929         if ( info->pixel_aspect_width == 0 ||
 930              info->pixel_aspect_height == 0 )
 931         {
 932             AVStream *st = hb_ffmpeg_avstream( w->codec_param );
 933             info->pixel_aspect_width = st->sample_aspect_ratio.num ?
 934                                         st->sample_aspect_ratio.num : 1;
 935             info->pixel_aspect_height = st->sample_aspect_ratio.den ?
 936                                         st->sample_aspect_ratio.den : 1;
 937         }
 938         /* ffmpeg returns the Pixel Aspect Ratio (PAR). Handbrake wants the
 939          * Display Aspect Ratio so we convert by scaling by the Storage
 940          * Aspect Ratio (w/h). We do the calc in floating point to get the
 941          * rounding right. */
 942         info->aspect = (double)info->pixel_aspect_width *
 943                        (double)context->width /
 944                        (double)info->pixel_aspect_height /
 945                        (double)context->height;
 946
 947         info->profile = context->profile;
 948         info->level = context->level;
 949         info->name = context->codec->name;
 950         return 1;
 951     }
 952     return 0;
 953 }
 954
 955 static int decavcodecvBSInfo( hb_work_object_t *w, const hb_buffer_t *buf,
 956                              hb_work_info_t *info )
 957 {
 958     return 0;
 959 }
 960
 961 hb_work_object_t hb_decavcodecv =
 962 {
 963     WORK_DECAVCODECV,
 964     "Video decoder (libavcodec)",
 965     decavcodecvInit,
 966     decavcodecvWork,
 967     decavcodecClose,
 968     decavcodecvInfo,
 969     decavcodecvBSInfo
 970 };
 971
 972
 973 // This is a special decoder for ffmpeg streams. The ffmpeg stream reader
 974 // includes a parser and passes information from the parser to the decoder
 975 // via a codec context kept in the AVStream of the reader's AVFormatContext.
 976 // We *have* to use that codec context to decode the stream or we'll get
 977 // garbage. ffmpeg_title_scan put a cookie that can be used to get to that
 978 // codec context in our codec_param.
 979
 980 // this routine gets the appropriate context pointer from the ffmpeg
 981 // stream reader. it can't be called until we get the first buffer because
 982 // we can't guarantee that reader will be called before the our init
 983 // routine and if our init is called first we'll get a pointer to the
 984 // old scan stream (which has already been closed).
 985 static void init_ffmpeg_context( hb_work_object_t *w )
 986 {
 987     hb_work_private_t *pv = w->private_data;
 988     pv->context = hb_ffmpeg_context( w->codec_param );
 989
 990     // during scan the decoder gets closed & reopened which will
 991     // close the codec so reopen it if it's not there
 992     if ( ! pv->context->codec )
 993     {
 994         AVCodec *codec = avcodec_find_decoder( pv->context->codec_id );
 995         hb_avcodec_open( pv->context, codec );
 996     }
 997     // set up our best guess at the frame duration.
 998     // the frame rate in the codec is usually bogus but it's sometimes
 999     // ok in the stream.
1000     AVStream *st = hb_ffmpeg_avstream( w->codec_param );
1001
1002     if ( st->nb_frames && st->duration )
1003     {
1004         // compute the average frame duration from the total number
1005         // of frames & the total duration.
1006         pv->duration = ( (double)st->duration * (double)st->time_base.num ) /
1007                        ( (double)st->nb_frames * (double)st->time_base.den );
1008     }
1009     else
1010     {
1011         // XXX We don't have a frame count or duration so try to use the
1012         // far less reliable time base info in the stream.
1013         // Because the time bases are so screwed up, we only take values
1014         // in the range 8fps - 64fps.
1015         AVRational tb;
1016         if ( st->time_base.num * 64 > st->time_base.den &&
1017              st->time_base.den > st->time_base.num * 8 )
1018         {
1019             tb = st->time_base;
1020         }
1021         else if ( st->r_frame_rate.den * 64 > st->r_frame_rate.num &&
1022                   st->r_frame_rate.num > st->r_frame_rate.den * 8 )
1023         {
1024             tb.num = st->r_frame_rate.den;
1025             tb.den = st->r_frame_rate.num;
1026         }
1027         else
1028         {
1029             tb.num = 1001;  /*XXX*/
1030             tb.den = 24000; /*XXX*/
1031         }
1032         pv->duration =  (double)tb.num / (double)tb.den;
1033     }
1034     pv->duration *= 90000.;
1035
1036     // we have to wrap ffmpeg's get_buffer to be able to set the pts (?!)
1037     pv->context->opaque = pv;
1038     pv->context->get_buffer = get_frame_buf;
1039
1040     // avi, mkv and possibly mp4 containers can contain the M$ VFW packed
1041     // b-frames abortion that messes up frame ordering and timestamps.
1042     // XXX ffmpeg knows which streams are broken but doesn't expose the
1043     //     info externally. We should patch ffmpeg to add a flag to the
1044     //     codec context for this but until then we mark all ffmpeg streams
1045     //     as suspicious.
1046     pv->brokenByMicrosoft = 1;
1047 }
1048
1049 static void prepare_ffmpeg_buffer( hb_buffer_t * in )
1050 {
1051     // ffmpeg requires an extra 8 bytes of zero at the end of the buffer and
1052     // will seg fault in odd, data dependent ways if it's not there. (my guess
1053     // is this is a case of a local performance optimization creating a global
1054     // performance degradation since all the time wasted by extraneous data
1055     // copies & memory zeroing has to be huge compared to the minor reduction
1056     // in inner-loop instructions this affords - modern cpus bottleneck on
1057     // memory bandwidth not instruction bandwidth).
1058     if ( in->size + FF_INPUT_BUFFER_PADDING_SIZE > in->alloc )
1059     {
1060         // have to realloc to add the padding
1061         hb_buffer_realloc( in, in->size + FF_INPUT_BUFFER_PADDING_SIZE );
1062     }
1063     memset( in->data + in->size, 0, FF_INPUT_BUFFER_PADDING_SIZE );
1064 }
1065
1066 static int decavcodecviInit( hb_work_object_t * w, hb_job_t * job )
1067 {
1068
1069     hb_work_private_t *pv = calloc( 1, sizeof( hb_work_private_t ) );
1070     w->private_data = pv;
1071     pv->job   = job;
1072     pv->list = hb_list_init();
1073     pv->pts_next = -1;
1074     pv->pts = -1;
1075     return 0;
1076 }
1077
1078 static int decavcodecviWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
1079                              hb_buffer_t ** buf_out )
1080 {
1081     hb_work_private_t *pv = w->private_data;
1082     if ( ! pv->context )
1083     {
1084         init_ffmpeg_context( w );
1085     }
1086     hb_buffer_t *in = *buf_in;
1087     *buf_in = NULL;
1088
1089     /* if we got an empty buffer signaling end-of-stream send it downstream */
1090     if ( in->size == 0 )
1091     {
1092         /* flush any frames left in the decoder */
1093         while ( decodeFrame( pv, NULL, 0 ) )
1094         {
1095         }
1096         flushDelayQueue( pv );
1097         hb_list_add( pv->list, in );
1098         *buf_out = link_buf_list( pv );
1099         return HB_WORK_DONE;
1100     }
1101
1102     int64_t pts = in->start;
1103     if( pts >= 0 )
1104     {
1105         // use the first timestamp as our 'next expected' pts
1106         if ( pv->pts_next < 0 )
1107         {
1108             pv->pts_next = pts;
1109         }
1110         pv->pts = pts;
1111     }
1112
1113     if ( in->new_chap )
1114     {
1115         pv->new_chap = in->new_chap;
1116         pv->chap_time = pts >= 0? pts : pv->pts_next;
1117     }
1118     prepare_ffmpeg_buffer( in );
1119     decodeFrame( pv, in->data, in->size );
1120     hb_buffer_close( &in );
1121     *buf_out = link_buf_list( pv );
1122     return HB_WORK_OK;
1123 }
1124
1125 static int decavcodecviInfo( hb_work_object_t *w, hb_work_info_t *info )
1126 {
1127     if ( decavcodecvInfo( w, info ) )
1128     {
1129         hb_work_private_t *pv = w->private_data;
1130         if ( ! pv->context )
1131         {
1132             init_ffmpeg_context( w );
1133         }
1134         // we have the frame duration in units of the 90KHz pts clock but
1135         // need it in units of the 27MHz MPEG clock. */
1136         info->rate = 27000000;
1137         info->rate_base = pv->duration * 300.;
1138         return 1;
1139     }
1140     return 0;
1141 }
1142
1143 static void decodeAudio( hb_work_private_t *pv, uint8_t *data, int size )
1144 {
1145     AVCodecContext *context = pv->context;
1146     int pos = 0;
1147
1148     while ( pos < size )
1149     {
1150         int16_t *buffer = pv->buffer;
1151         if ( buffer == NULL )
1152         {
1153             pv->buffer = av_malloc( AVCODEC_MAX_AUDIO_FRAME_SIZE );
1154             buffer = pv->buffer;
1155         }
1156
1157         AVPacket avp;
1158         av_init_packet( &avp );
1159         avp.data = data + pos;
1160         avp.size = size - pos;
1161
1162         int out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
1163         int len = avcodec_decode_audio3( context, buffer, &out_size, &avp );
1164         if ( len <= 0 )
1165         {
1166             return;
1167         }
1168         pos += len;
1169         if( out_size > 0 )
1170         {
1171             // We require signed 16-bit ints for the output format. If
1172             // we got something different convert it.
1173             if ( context->sample_fmt != SAMPLE_FMT_S16 )
1174             {
1175                 // Note: av_audio_convert seems to be a work-in-progress but
1176                 //       looks like it will eventually handle general audio
1177                 //       mixdowns which would allow us much more flexibility
1178                 //       in handling multichannel audio in HB. If we were doing
1179                 //       anything more complicated than a one-for-one format
1180                 //       conversion we'd probably want to cache the converter
1181                 //       context in the pv.
1182                 int isamp = av_get_bits_per_sample_format( context->sample_fmt ) / 8;
1183                 AVAudioConvert *ctx = av_audio_convert_alloc( SAMPLE_FMT_S16, 1,
1184                                                               context->sample_fmt, 1,
1185                                                               NULL, 0 );
1186                 // get output buffer size (in 2-byte samples) then malloc a buffer
1187                 out_size = ( out_size * 2 ) / isamp;
1188                 buffer = av_malloc( out_size );
1189
1190                 // we're doing straight sample format conversion which behaves as if
1191                 // there were only one channel.
1192                 const void * const ibuf[6] = { pv->buffer };
1193                 void * const obuf[6] = { buffer };
1194                 const int istride[6] = { isamp };
1195                 const int ostride[6] = { 2 };
1196
1197                 av_audio_convert( ctx, obuf, ostride, ibuf, istride, out_size >> 1 );
1198                 av_audio_convert_free( ctx );
1199             }
1200             hb_buffer_t *buf = hb_buffer_init( 2 * out_size );
1201
1202             // convert from bytes to total samples
1203             out_size >>= 1;
1204
1205             double pts = pv->pts_next;
1206             buf->start = pts;
1207             pts += out_size * pv->duration;
1208             buf->stop  = pts;
1209             pv->pts_next = pts;
1210
1211             float *fl32 = (float *)buf->data;
1212             int i;
1213             for( i = 0; i < out_size; ++i )
1214             {
1215                 fl32[i] = buffer[i];
1216             }
1217             hb_list_add( pv->list, buf );
1218
1219             // if we allocated a buffer for sample format conversion, free it
1220             if ( buffer != pv->buffer )
1221             {
1222                 av_free( buffer );
1223             }
1224         }
1225     }
1226 }
1227
1228 static int decavcodecaiWork( hb_work_object_t *w, hb_buffer_t **buf_in,
1229                     hb_buffer_t **buf_out )
1230 {
1231     if ( (*buf_in)->size <= 0 )
1232     {
1233         /* EOF on input stream - send it downstream & say that we're done */
1234         *buf_out = *buf_in;
1235         *buf_in = NULL;
1236         return HB_WORK_DONE;
1237     }
1238
1239     hb_work_private_t *pv = w->private_data;
1240
1241     if ( (*buf_in)->start < -1 && pv->pts_next <= 0 )
1242     {
1243         // discard buffers that start before video time 0
1244         *buf_out = NULL;
1245         return HB_WORK_OK;
1246     }
1247
1248     if ( ! pv->context )
1249     {
1250         init_ffmpeg_context( w );
1251         // duration is a scaling factor to go from #bytes in the decoded
1252         // frame to frame time (in 90KHz mpeg ticks). 'channels' converts
1253         // total samples to per-channel samples. 'sample_rate' converts
1254         // per-channel samples to seconds per sample and the 90000
1255         // is mpeg ticks per second.
1256         pv->duration = 90000. /
1257                     (double)( pv->context->sample_rate * pv->context->channels );
1258     }
1259     hb_buffer_t *in = *buf_in;
1260
1261     // if the packet has a timestamp use it if we don't have a timestamp yet
1262     // or if there's been a timing discontinuity of more than 100ms.
1263     if ( in->start >= 0 &&
1264          ( pv->pts_next < 0 || ( in->start - pv->pts_next ) > 90*100 ) )
1265     {
1266         pv->pts_next = in->start;
1267     }
1268     prepare_ffmpeg_buffer( in );
1269     decodeAudio( pv, in->data, in->size );
1270     *buf_out = link_buf_list( pv );
1271
1272     return HB_WORK_OK;
1273 }
1274
1275 hb_work_object_t hb_decavcodecvi =
1276 {
1277     WORK_DECAVCODECVI,
1278     "Video decoder (ffmpeg streams)",
1279     decavcodecviInit,
1280     decavcodecviWork,
1281     decavcodecClose,
1282     decavcodecviInfo,
1283     decavcodecvBSInfo
1284 };
1285
1286 hb_work_object_t hb_decavcodecai =
1287 {
1288     WORK_DECAVCODECAI,
1289     "Audio decoder (ffmpeg streams)",
1290     decavcodecviInit,
1291     decavcodecaiWork,
1292     decavcodecClose,
1293     decavcodecInfo,
1294     decavcodecBSInfo
1295 };