libhb/encx264.c

   1 /* $Id: encx264.c,v 1.21 2005/11/04 13:09:41 titer Exp $
   2
   3    This file is part of the HandBrake source code.
   4    Homepage: <http://handbrake.fr/>.
   5    It may be used under the terms of the GNU General Public License. */
   6
   7 #include <stdarg.h>
   8
   9 #include "hb.h"
  10
  11 #include "x264.h"
  12
  13 int  encx264Init( hb_work_object_t *, hb_job_t * );
  14 int  encx264Work( hb_work_object_t *, hb_buffer_t **, hb_buffer_t ** );
  15 void encx264Close( hb_work_object_t * );
  16
  17 hb_work_object_t hb_encx264 =
  18 {
  19     WORK_ENCX264,
  20     "H.264/AVC encoder (libx264)",
  21     encx264Init,
  22     encx264Work,
  23     encx264Close
  24 };
  25
  26 #define DTS_BUFFER_SIZE 32
  27
  28 /*
  29  * The frame info struct remembers information about each frame across calls
  30  * to x264_encoder_encode. Since frames are uniquely identified by their
  31  * timestamp, we use some bits of the timestamp as an index. The LSB is
  32  * chosen so that two successive frames will have different values in the
  33  * bits over any plausible range of frame rates. (Starting with bit 9 allows
  34  * any frame rate slower than 175fps.) The MSB determines the size of the array.
  35  * It is chosen so that two frames can't use the same slot during the
  36  * encoder's max frame delay (set by the standard as 16 frames) and so
  37  * that, up to some minimum frame rate, frames are guaranteed to map to
  38  * different slots. (An MSB of 16 which is 2^(16-9+1) = 256 slots guarantees
  39  * no collisions down to a rate of 1.4 fps).
  40  */
  41 #define FRAME_INFO_MAX2 (9)     // 2^9 = 512; 90000/512 = 175 frames/sec
  42 #define FRAME_INFO_MIN2 (16)    // 2^16 = 65536; 90000/65536 = 1.4 frames/sec
  43 #define FRAME_INFO_SIZE (1 << (FRAME_INFO_MIN2 - FRAME_INFO_MAX2 + 1))
  44 #define FRAME_INFO_MASK (FRAME_INFO_SIZE - 1)
  45
  46 struct hb_work_private_s
  47 {
  48     hb_job_t       * job;
  49     x264_t         * x264;
  50     x264_picture_t   pic_in;
  51     uint8_t         *x264_allocated_pic;
  52
  53     int            chap_mark;   // saved chap mark when we're propagating it
  54     int64_t        dts_next;    // DTS start time value for next output frame
  55     int64_t        last_stop;   // Debugging - stop time of previous input frame
  56     int64_t        init_delay;
  57     int64_t        max_delay;   // if init_delay too small, delay really needed
  58     int64_t        next_chap;
  59
  60     struct {
  61         int64_t duration;
  62     } frame_info[FRAME_INFO_SIZE];
  63
  64     char             filename[1024];
  65 };
  66
  67 /***********************************************************************
  68  * hb_work_encx264_init
  69  ***********************************************************************
  70  *
  71  **********************************************************************/
  72 int encx264Init( hb_work_object_t * w, hb_job_t * job )
  73 {
  74     x264_param_t       param;
  75     x264_nal_t       * nal;
  76     int                nal_count;
  77
  78     hb_work_private_t * pv = calloc( 1, sizeof( hb_work_private_t ) );
  79     w->private_data = pv;
  80
  81     pv->job = job;
  82
  83     memset( pv->filename, 0, 1024 );
  84     hb_get_tempory_filename( job->h, pv->filename, "x264.log" );
  85
  86     x264_param_default( &param );
  87
  88     param.i_threads    = ( hb_get_cpu_count() * 3 / 2 );
  89     param.i_width      = job->width;
  90     param.i_height     = job->height;
  91     param.i_fps_num    = job->vrate;
  92     param.i_fps_den    = job->vrate_base;
  93
  94     if (job->vrate_base != 1080000)
  95     {
  96         /* If the fps isn't 25, adjust the key intervals. Add 1 because
  97            we want 24, not 23 with a truncated remainder.               */
  98         param.i_keyint_min     = (job->vrate / job->vrate_base) + 1;
  99         param.i_keyint_max = (10 * job->vrate / job->vrate_base) + 1;
 100         hb_log("encx264: keyint-min: %i, keyint-max: %i", param.i_keyint_min, param.i_keyint_max);
 101     }
 102
 103     param.i_log_level  = X264_LOG_INFO;
 104     if( job->h264_level )
 105     {
 106         param.b_cabac     = 0;
 107         param.i_level_idc = job->h264_level;
 108         hb_log( "encx264: encoding at level %i",
 109                 param.i_level_idc );
 110     }
 111
 112     /* Slightly faster with minimal quality lost */
 113     param.analyse.i_subpel_refine = 4;
 114
 115     /*
 116         This section passes the string x264opts to libx264 for parsing into
 117         parameter names and values.
 118
 119         The string is set up like this:
 120         option1=value1:option2=value 2
 121
 122         So, you have to iterate through based on the colons, and then put
 123         the left side of the equals sign in "name" and the right side into
 124         "value." Then you hand those strings off to x264 for interpretation.
 125
 126         This is all based on the universal x264 option handling Loren
 127         Merritt implemented in the Mplayer/Mencoder project.
 128      */
 129
 130     if( job->x264opts != NULL && *job->x264opts != '\0' )
 131     {
 132         char *x264opts, *x264opts_start;
 133
 134         x264opts = x264opts_start = strdup(job->x264opts);
 135
 136         while( x264opts_start && *x264opts )
 137         {
 138             char *name = x264opts;
 139             char *value;
 140             int ret;
 141
 142             x264opts += strcspn( x264opts, ":" );
 143             if( *x264opts )
 144             {
 145                 *x264opts = 0;
 146                 x264opts++;
 147             }
 148
 149             value = strchr( name, '=' );
 150             if( value )
 151             {
 152                 *value = 0;
 153                 value++;
 154             }
 155
 156             /*
 157                When B-frames are enabled, the max frame count increments
 158                by 1 (regardless of the number of B-frames). If you don't
 159                change the duration of the video track when you mux, libmp4
 160                barfs.  So, check if the x264opts are using B-frames, and
 161                when they are, set the boolean job->areBframes as true.
 162              */
 163
 164             if( !( strcmp( name, "bframes" ) ) )
 165             {
 166                 if( atoi( value ) > 0 )
 167                 {
 168                     job->areBframes = 1;
 169                 }
 170             }
 171
 172             /* Note b-pyramid here, so the initial delay can be doubled */
 173             if( !( strcmp( name, "b-pyramid" ) ) )
 174             {
 175                 if( value != NULL )
 176                 {
 177                     if( atoi( value ) > 0 )
 178                     {
 179                         job->areBframes = 2;
 180                     }
 181                 }
 182                 else
 183                 {
 184                     job->areBframes = 2;
 185                 }
 186             }
 187
 188             /* Here's where the strings are passed to libx264 for parsing. */
 189             ret = x264_param_parse( &param, name, value );
 190
 191             /*  Let x264 sanity check the options for us*/
 192             if( ret == X264_PARAM_BAD_NAME )
 193                 hb_log( "x264 options: Unknown suboption %s", name );
 194             if( ret == X264_PARAM_BAD_VALUE )
 195                 hb_log( "x264 options: Bad argument %s=%s", name, value ? value : "(null)" );
 196         }
 197         free(x264opts_start);
 198     }
 199
 200
 201     if( job->pixel_ratio )
 202     {
 203         param.vui.i_sar_width = job->pixel_aspect_width;
 204         param.vui.i_sar_height = job->pixel_aspect_height;
 205
 206         hb_log( "encx264: encoding with stored aspect %d/%d",
 207                 param.vui.i_sar_width, param.vui.i_sar_height );
 208     }
 209
 210
 211     if( job->vquality > 0.0 && job->vquality < 1.0 )
 212     {
 213         switch( job->crf )
 214         {
 215             case 1:
 216                 /*Constant RF*/
 217                 param.rc.i_rc_method = X264_RC_CRF;
 218                 param.rc.f_rf_constant = 51 - job->vquality * 51;
 219                 hb_log( "encx264: Encoding at constant RF %f",
 220                         param.rc.f_rf_constant );
 221                 break;
 222
 223             case 0:
 224                 /*Constant QP*/
 225                 param.rc.i_rc_method = X264_RC_CQP;
 226                 param.rc.i_qp_constant = 51 - job->vquality * 51;
 227                 hb_log( "encx264: encoding at constant QP %d",
 228                         param.rc.i_qp_constant );
 229                 break;
 230         }
 231     }
 232     else if( job->vquality == 0 || job->vquality >= 1.0 )
 233     {
 234         /* Use the vquality as a raw RF or QP
 235           instead of treating it like a percentage. */
 236         switch( job->crf )
 237         {
 238             case 1:
 239                 /*Constant RF*/
 240                 param.rc.i_rc_method = X264_RC_CRF;
 241                 param.rc.f_rf_constant = job->vquality;
 242                 hb_log( "encx264: Encoding at constant RF %f",
 243                         param.rc.f_rf_constant );
 244                 break;
 245
 246             case 0:
 247                 /*Constant QP*/
 248                 param.rc.i_rc_method = X264_RC_CQP;
 249                 param.rc.i_qp_constant = job->vquality;
 250                 hb_log( "encx264: encoding at constant QP %d",
 251                         param.rc.i_qp_constant );
 252                 break;
 253         }
 254     }
 255     else
 256     {
 257         /* Rate control */
 258         param.rc.i_rc_method = X264_RC_ABR;
 259         param.rc.i_bitrate = job->vbitrate;
 260         switch( job->pass )
 261         {
 262             case 1:
 263                 param.rc.b_stat_write  = 1;
 264                 param.rc.psz_stat_out = pv->filename;
 265                 break;
 266             case 2:
 267                 param.rc.b_stat_read = 1;
 268                 param.rc.psz_stat_in = pv->filename;
 269                 break;
 270         }
 271     }
 272
 273     hb_log( "encx264: opening libx264 (pass %d)", job->pass );
 274     pv->x264 = x264_encoder_open( &param );
 275
 276     x264_encoder_headers( pv->x264, &nal, &nal_count );
 277
 278     /* Sequence Parameter Set */
 279     w->config->h264.sps_length = 1 + nal[1].i_payload;
 280     w->config->h264.sps[0] = 0x67;
 281     memcpy( &w->config->h264.sps[1], nal[1].p_payload, nal[1].i_payload );
 282
 283     /* Picture Parameter Set */
 284     w->config->h264.pps_length = 1 + nal[2].i_payload;
 285     w->config->h264.pps[0] = 0x68;
 286     memcpy( &w->config->h264.pps[1], nal[2].p_payload, nal[2].i_payload );
 287
 288     x264_picture_alloc( &pv->pic_in, X264_CSP_I420,
 289                         job->width, job->height );
 290
 291     pv->x264_allocated_pic = pv->pic_in.img.plane[0];
 292
 293     pv->dts_next = -1;
 294     pv->next_chap = 0;
 295
 296     if (job->areBframes)
 297     {
 298         /* Basic initDelay value is the clockrate divided by the FPS
 299            -- the length of one frame in clockticks.                  */
 300         pv->init_delay = 90000. / ((double)job->vrate / (double)job->vrate_base);
 301
 302         /* 23.976-length frames are 3753.75 ticks long on average but the DVD
 303            creates that average rate by repeating 59.95 fields so the max
 304            frame size is actually 4504.5 (3 field times) */
 305         if (pv->init_delay == 3753)
 306             pv->init_delay = 4505;
 307
 308         /* frame rates are not exact in the DVD 90KHz PTS clock (they are
 309            exact in the DVD 27MHz system clock but we never see that) so the
 310            rates computed above are all +-1 due to quantization. Worst case
 311            is when a clock-rounded-down frame is adjacent to a rounded-up frame
 312            which makes one of the frames 2 ticks longer than the nominal
 313            frame time. */
 314         pv->init_delay += 2;
 315
 316         /* For VFR, libhb sees the FPS as 29.97, but the longest frames
 317            will use the duration of frames running at 23.976fps instead.. */
 318         if (job->vfr)
 319         {
 320             pv->init_delay = 7506;
 321         }
 322
 323         /* The delay is 1 frames for regular b-frames, 2 for b-pyramid. */
 324         pv->init_delay *= job->areBframes;
 325     }
 326
 327     return 0;
 328 }
 329
 330 void encx264Close( hb_work_object_t * w )
 331 {
 332     hb_work_private_t * pv = w->private_data;
 333     /*
 334      * Patch the x264 allocated data back in so that x264 can free it
 335      * we have been using our own buffers during the encode to avoid copying.
 336      */
 337     pv->pic_in.img.plane[0] = pv->x264_allocated_pic;
 338     x264_picture_clean( &pv->pic_in );
 339     x264_encoder_close( pv->x264 );
 340     free( pv );
 341     w->private_data = NULL;
 342
 343     /* TODO */
 344 }
 345
 346 /*
 347  * see comments in definition of 'frame_info' in pv struct for description
 348  * of what these routines are doing.
 349  */
 350 static void save_frame_info( hb_work_private_t * pv, hb_buffer_t * in )
 351 {
 352     int i = (in->start >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 353     pv->frame_info[i].duration = in->stop - in->start;
 354 }
 355
 356 static int64_t get_frame_duration( hb_work_private_t * pv, int64_t pts )
 357 {
 358     int i = (pts >> FRAME_INFO_MAX2) & FRAME_INFO_MASK;
 359     return pv->frame_info[i].duration;
 360 }
 361
 362 int encx264Work( hb_work_object_t * w, hb_buffer_t ** buf_in,
 363                   hb_buffer_t ** buf_out )
 364 {
 365     hb_work_private_t * pv = w->private_data;
 366     hb_job_t    * job = pv->job;
 367     hb_buffer_t * in = *buf_in, * buf;
 368     x264_picture_t   pic_out;
 369     int           i_nal;
 370     x264_nal_t  * nal;
 371     int i;
 372
 373     if( in->data )
 374     {
 375         /*
 376          * Point x264 at our current buffers Y(UV) data.
 377          */
 378         pv->pic_in.img.plane[0] = in->data;
 379
 380         if( job->grayscale )
 381         {
 382             /* XXX x264 has currently no option for grayscale encoding */
 383             memset( pv->pic_in.img.plane[1], 0x80, job->width * job->height / 4 );
 384             memset( pv->pic_in.img.plane[2], 0x80, job->width * job->height / 4 );
 385         }
 386         else
 387         {
 388             /*
 389              * Point x264 at our buffers (Y)UV data
 390              */
 391             pv->pic_in.img.plane[1] = in->data + job->width * job->height;
 392             pv->pic_in.img.plane[2] = in->data + 5 * job->width *
 393                 job->height / 4;
 394         }
 395
 396         if( pv->dts_next == -1 )
 397         {
 398             /* we don't have a start time yet so use the first frame's
 399              * start. All other frame times will be determined by the
 400              * sum of the prior output frame durations in *DTS* order
 401              * (not by the order they arrive here). This timing change is
 402              * essential for VFR with b-frames but a complete nop otherwise.
 403              */
 404             pv->dts_next = in->start;
 405         }
 406         if( in->new_chap && job->chapter_markers )
 407         {
 408             /* chapters have to start with an IDR frame so request that this
 409                frame be coded as IDR. Since there may be up to 16 frames
 410                currently buffered in the encoder remember the timestamp so
 411                when this frame finally pops out of the encoder we'll mark
 412                its buffer as the start of a chapter. */
 413             pv->pic_in.i_type = X264_TYPE_IDR;
 414             if( pv->next_chap == 0 )
 415             {
 416                 pv->next_chap = in->start;
 417                 pv->chap_mark = in->new_chap;
 418             }
 419             /* don't let 'work_loop' put a chapter mark on the wrong buffer */
 420             in->new_chap = 0;
 421         }
 422         else
 423         {
 424             pv->pic_in.i_type = X264_TYPE_AUTO;
 425         }
 426         pv->pic_in.i_qpplus1 = 0;
 427
 428         /* XXX this is temporary debugging code to check that the upstream
 429          * modules (render & sync) have generated a continuous, self-consistent
 430          * frame stream with the current frame's start time equal to the
 431          * previous frame's stop time.
 432          */
 433         if( pv->last_stop != in->start )
 434         {
 435             hb_log("encx264 input continuity err: last stop %lld  start %lld",
 436                     pv->last_stop, in->start);
 437         }
 438         pv->last_stop = in->stop;
 439
 440         // Remember info about this frame that we need to pass across
 441         // the x264_encoder_encode call (since it reorders frames).
 442         save_frame_info( pv, in );
 443
 444         /* Feed the input DTS to x264 so it can figure out proper output PTS */
 445         pv->pic_in.i_pts = in->start;
 446
 447         x264_encoder_encode( pv->x264, &nal, &i_nal,
 448                              &pv->pic_in, &pic_out );
 449     }
 450     else
 451     {
 452         x264_encoder_encode( pv->x264, &nal, &i_nal,
 453                              NULL, &pic_out );
 454         /* No more delayed B frames */
 455         if( i_nal == 0 )
 456         {
 457             *buf_out = NULL;
 458             return HB_WORK_DONE;
 459         }
 460         else
 461         {
 462         /*  Since we output at least one more frame, drop another empty
 463             one onto our input fifo.  We'll keep doing this automatically
 464             until we stop getting frames out of the encoder. */
 465             hb_fifo_push(w->fifo_in, hb_buffer_init(0));
 466         }
 467     }
 468
 469     if( i_nal )
 470     {
 471         /* Should be way too large */
 472         buf        = hb_buffer_init( 3 * job->width * job->height / 2 );
 473         buf->size  = 0;
 474         buf->frametype   = 0;
 475
 476         /* Get next DTS value to use */
 477         int64_t dts_start = pv->dts_next;
 478
 479         /* compute the stop time based on the original frame's duration */
 480         int64_t dts_stop  = dts_start + get_frame_duration( pv, pic_out.i_pts );
 481         pv->dts_next = dts_stop;
 482
 483         for( i = 0; i < i_nal; i++ )
 484         {
 485             int size, data;
 486
 487             data = buf->alloc - buf->size;
 488             if( ( size = x264_nal_encode( buf->data + buf->size, &data,
 489                                           1, &nal[i] ) ) < 1 )
 490             {
 491                 continue;
 492             }
 493
 494             if( job->mux & HB_MUX_AVI )
 495             {
 496                 if( nal[i].i_ref_idc == NAL_PRIORITY_HIGHEST )
 497                 {
 498                     buf->frametype = HB_FRAME_KEY;
 499                 }
 500                 buf->size += size;
 501                 continue;
 502             }
 503
 504             /* H.264 in .mp4 */
 505             switch( buf->data[buf->size+4] & 0x1f )
 506             {
 507                 case 0x7:
 508                 case 0x8:
 509                     /* SPS, PPS */
 510                     break;
 511
 512                 default:
 513                     /* H.264 in mp4 (stolen from mp4creator) */
 514                     buf->data[buf->size+0] = ( ( size - 4 ) >> 24 ) & 0xFF;
 515                     buf->data[buf->size+1] = ( ( size - 4 ) >> 16 ) & 0xFF;
 516                     buf->data[buf->size+2] = ( ( size - 4 ) >>  8 ) & 0xFF;
 517                     buf->data[buf->size+3] = ( ( size - 4 ) >>  0 ) & 0xFF;
 518                     switch( pic_out.i_type )
 519                     {
 520                     /*  Decide what type of frame we have. */
 521                         case X264_TYPE_IDR:
 522                             buf->frametype = HB_FRAME_IDR;
 523                             /* if we have a chapter marker pending and this
 524                                frame's presentation time stamp is at or after
 525                                the marker's time stamp, use this as the
 526                                chapter start. */
 527                             if( pv->next_chap != 0 && pv->next_chap <= pic_out.i_pts )
 528                             {
 529                                 pv->next_chap = 0;
 530                                 buf->new_chap = pv->chap_mark;
 531                             }
 532                             break;
 533                         case X264_TYPE_I:
 534                             buf->frametype = HB_FRAME_I;
 535                             break;
 536                         case X264_TYPE_P:
 537                             buf->frametype = HB_FRAME_P;
 538                             break;
 539                         case X264_TYPE_B:
 540                             buf->frametype = HB_FRAME_B;
 541                             break;
 542                     /*  This is for b-pyramid, which has reference b-frames
 543                         However, it doesn't seem to ever be used... */
 544                         case X264_TYPE_BREF:
 545                             buf->frametype = HB_FRAME_BREF;
 546                             break;
 547                     /*  If it isn't the above, what type of frame is it?? */
 548                         default:
 549                             buf->frametype = 0;
 550                     }
 551
 552                     /* Since libx264 doesn't tell us when b-frames are
 553                        themselves reference frames, figure it out on our own. */
 554                     if( (buf->frametype == HB_FRAME_B) && (nal[i].i_ref_idc != NAL_PRIORITY_DISPOSABLE) )
 555                         buf->frametype = HB_FRAME_BREF;
 556
 557                     /* Store the output presentation time stamp
 558                        from x264 for use by muxmp4 in off-setting
 559                        b-frames with the CTTS atom. */
 560                     buf->renderOffset = pic_out.i_pts - dts_start + pv->init_delay;
 561                     if ( buf->renderOffset < 0 )
 562                     {
 563                         if ( dts_start - pic_out.i_pts > pv->max_delay )
 564                         {
 565                             pv->max_delay = dts_start - pic_out.i_pts;
 566                             hb_log( "encx264: init_delay too small: "
 567                                     "is %lld need %lld", pv->init_delay,
 568                                     pv->max_delay );
 569                         }
 570                         buf->renderOffset = 0;
 571                     }
 572                     buf->size += size;
 573             }
 574         }
 575         /* Send out the next dts values */
 576         buf->start = dts_start;
 577         buf->stop  = dts_stop;
 578     }
 579
 580     else
 581         buf = NULL;
 582
 583     *buf_out = buf;
 584
 585     return HB_WORK_OK;
 586 }