From 60fa9149ab1937229e10120292f0fd3599bf6617 Mon Sep 17 00:00:00 2001 From: dynaflash Date: Sun, 8 Jun 2008 21:49:14 +0000 Subject: [PATCH] Update x264 to x264-r877-c74a8e2 - Eliminates the need for our last vbv 2 pass patch - Updated reporting on processor optimizations - 5 - 10 % faster trellis - assorted improved cpu optimizations git-svn-id: svn://localhost/HandBrake/trunk@1504 b64f7644-9d1e-0410-96f1-a4d463321fa5 --- contrib/Jamfile | 3 +- contrib/patch-x264-vbv-2pass.patch | 497 ------------------------------------- contrib/version_x264.txt | 2 +- 3 files changed, 2 insertions(+), 500 deletions(-) delete mode 100644 contrib/patch-x264-vbv-2pass.patch diff --git a/contrib/Jamfile b/contrib/Jamfile index aaeb38f0..289d7c50 100644 --- a/contrib/Jamfile +++ b/contrib/Jamfile @@ -395,8 +395,7 @@ rule LibX264 LIBX264_PATCH += " $(PATCH) -p1 < ../patch-x264-solaris.patch && " ; } LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-idr.patch && " ; - LIBX264_PATCH += "$(PATCH) -p0 < ../patch-x264-vbv-2pass.patch && " ; - Depends $(<) : $(>) ; + Depends $(<) : $(>) ; Depends lib : $(<) ; } actions LibX264 diff --git a/contrib/patch-x264-vbv-2pass.patch b/contrib/patch-x264-vbv-2pass.patch deleted file mode 100644 index 7f469238..00000000 --- a/contrib/patch-x264-vbv-2pass.patch +++ /dev/null @@ -1,497 +0,0 @@ -Index: common/frame.c -=================================================================== ---- common/frame.c -+++ common/frame.c -@@ -844,11 +844,39 @@ void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) - x264_pthread_mutex_unlock( &frame->mutex ); - } - -+void x264_frame_size_estimated_set( x264_t *h, int bits ) -+{ -+ x264_pthread_mutex_lock( &h->fenc->mutex ); -+ x264_ratecontrol_set_estimated_size(h, bits); -+ x264_pthread_mutex_unlock( &h->fenc->mutex ); -+} -+ -+int x264_frame_size_estimated_get( x264_t const *h) -+{ -+ int size; -+ x264_pthread_mutex_lock( &h->fenc->mutex ); -+ size = x264_ratecontrol_get_estimated_size(h); -+ x264_pthread_mutex_unlock( &h->fenc->mutex ); -+ return size; -+} -+ - #else - void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ) - {} - void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) - {} -+ -+void x264_frame_size_estimated_set( x264_t *h, int bits ) -+{ -+ x264_ratecontrol_set_estimated_size(h, bits); -+} -+ -+int x264_frame_size_estimated_get( x264_t const *h) -+{ -+ int size; -+ size = x264_ratecontrol_set_estimated_size(h); -+ return size; -+} - #endif - - -Index: common/frame.h -=================================================================== ---- common/frame.h -+++ common/frame.h -@@ -121,6 +121,9 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf ); - void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); - void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); - -+void x264_frame_size_estimated_set( x264_t *h, int bits ); -+int x264_frame_size_estimated_get( x264_t const *h); -+ - void x264_frame_push( x264_frame_t **list, x264_frame_t *frame ); - x264_frame_t *x264_frame_pop( x264_frame_t **list ); - void x264_frame_unshift( x264_frame_t **list, x264_frame_t *frame ); -Index: encoder/encoder.c -=================================================================== ---- encoder/encoder.c -+++ encoder/encoder.c -@@ -631,6 +631,7 @@ x264_t *x264_encoder_open ( x264_param_t *param ) - || h->param.rc.i_rc_method == X264_RC_CRF - || h->param.b_bframe_adaptive - || h->param.b_pre_scenecut ); -+ h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0); - - h->frames.i_last_idr = - h->param.i_keyint_max; - h->frames.i_input = 0; -Index: encoder/ratecontrol.c -=================================================================== -old mode 100644 -new mode 100755 ---- encoder/ratecontrol.c -+++ encoder/ratecontrol.c -@@ -43,6 +43,7 @@ typedef struct - int p_tex_bits; - int misc_bits; - uint64_t expected_bits; -+ double expected_vbv; - float new_qscale; - int new_qp; - int i_count; -@@ -121,6 +122,7 @@ struct x264_ratecontrol_t - int frame_count[5]; /* number of frames of each type */ - - /* MBRC stuff */ -+ double frame_size_estimated; - double frame_size_planned; - predictor_t *row_pred; - predictor_t row_preds[5]; -@@ -331,7 +333,7 @@ int x264_ratecontrol_new( x264_t *h ) - rc->rate_tolerance = 0.01; - } - -- h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.rc.i_aq_mode; -+ h->mb.b_variable_qp = rc->b_vbv || h->param.rc.i_aq_mode; - - if( rc->b_abr ) - { -@@ -718,6 +720,16 @@ void x264_ratecontrol_delete( x264_t *h ) - x264_free( rc ); - } - -+void x264_ratecontrol_set_estimated_size( x264_t *h, int bits ) -+{ -+ h->rc->frame_size_estimated = bits; -+} -+ -+int x264_ratecontrol_get_estimated_size( x264_t const *h) -+{ -+ return h->rc->frame_size_estimated; -+} -+ - static void accum_p_qp_update( x264_t *h, float qp ) - { - x264_ratecontrol_t *rc = h->rc; -@@ -851,17 +863,25 @@ double predict_row_size( x264_t *h, int y, int qp ) - return (pred_s + pred_t) / 2; - } - --double predict_row_size_sum( x264_t *h, int y, int qp ) -+double row_bits_so_far( x264_t *h, int y ) - { - int i; - double bits = 0; - for( i = 0; i <= y; i++ ) - bits += h->fdec->i_row_bits[i]; -+ return bits; -+} -+ -+double predict_row_size_sum( x264_t *h, int y, int qp ) -+{ -+ int i; -+ double bits = row_bits_so_far(h, y); - for( i = y+1; i < h->sps->i_mb_height; i++ ) - bits += predict_row_size( h, i, qp ); - return bits; - } - -+ - void x264_ratecontrol_mb( x264_t *h, int bits ) - { - x264_ratecontrol_t *rc = h->rc; -@@ -873,7 +893,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) - rc->qpa_rc += rc->qpm; - rc->qpa_aq += h->mb.i_qp; - -- if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv || rc->b_2pass ) -+ if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv) - return; - - h->fdec->i_row_qp[y] = rc->qpm; -@@ -883,9 +903,13 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) - /* B-frames shouldn't use lower QP than their reference frames */ - if( y < h->sps->i_mb_height-1 ) - { -- rc->qpm = X264_MAX( rc->qp, -- X264_MIN( h->fref0[0]->i_row_qp[y+1], -- h->fref1[0]->i_row_qp[y+1] )); -+ int i_estimated; -+ int avg_qp = X264_MAX(h->fref0[0]->i_row_qp[y+1], h->fref1[0]->i_row_qp[y+1]) -+ + rc->pb_offset * ((h->fenc->i_type == X264_TYPE_BREF) ? 0.5 : 1); -+ rc->qpm = X264_MIN(X264_MAX( rc->qp, avg_qp), 51); //avg_qp could go higher than 51 due to pb_offset -+ i_estimated = row_bits_so_far(h, y); //FIXME: compute full estimated size -+ if (i_estimated > h->rc->frame_size_planned) -+ x264_frame_size_estimated_set(h, i_estimated); - } - } - else -@@ -901,26 +925,49 @@ void x264_ratecontrol_mb( x264_t *h, int bits ) - int i_qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, h->param.rc.i_qp_max ); - int i_qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min ); - float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned; -+ float rc_tol = 1; -+ float headroom = 0; -+ -+ /* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */ -+ /* area at the top of the frame was measured inaccurately. */ -+ if(row_bits_so_far(h,y) < 0.05 * rc->frame_size_planned) -+ { -+ return; -+ } -+ -+ headroom = buffer_left_planned/rc->buffer_size; -+ if(h->sh.i_type != SLICE_TYPE_I) -+ headroom /= 2; -+ rc_tol += headroom; - - if( !rc->b_vbv_min_rate ) - i_qp_min = X264_MAX( i_qp_min, h->sh.i_qp ); - - while( rc->qpm < i_qp_max -- && (b1 > rc->frame_size_planned * 1.15 -+ && (b1 > rc->frame_size_planned * rc_tol - || (rc->buffer_fill - b1 < buffer_left_planned * 0.5))) - { - rc->qpm ++; - b1 = predict_row_size_sum( h, y, rc->qpm ); - } - -+ /* avoid VBV underflow */ -+ while( (rc->qpm < h->param.rc.i_qp_max) -+ && (rc->buffer_fill - b1 < rc->buffer_size * 0.005)) -+ { -+ rc->qpm ++; -+ b1 = predict_row_size_sum( h, y, rc->qpm ); -+ } -+ - while( rc->qpm > i_qp_min -- && buffer_left_planned > rc->buffer_size * 0.4 -+ && ((buffer_left_planned > rc->buffer_size * 0.4) || rc->qpm > h->fdec->i_row_qp[0]) - && ((b1 < rc->frame_size_planned * 0.8 && rc->qpm <= prev_row_qp) - || b1 < (rc->buffer_fill - rc->buffer_size + rc->buffer_rate) * 1.1) ) - { - rc->qpm --; - b1 = predict_row_size_sum( h, y, rc->qpm ); - } -+ x264_frame_size_estimated_set(h, b1); - } - } - } -@@ -1249,7 +1296,7 @@ static void update_vbv( x264_t *h, int bits ) - return; - - rct->buffer_fill_final += rct->buffer_rate - bits; -- if( rct->buffer_fill_final < 0 && !rct->b_2pass ) -+ if( rct->buffer_fill_final < 0 ) - x264_log( h, X264_LOG_WARNING, "VBV underflow (%.0f bits)\n", rct->buffer_fill_final ); - rct->buffer_fill_final = x264_clip3f( rct->buffer_fill_final, 0, rct->buffer_size ); - } -@@ -1269,6 +1316,7 @@ static void update_vbv_plan( x264_t *h ) - double bits = t->rc->frame_size_planned; - if( !t->b_thread_active ) - continue; -+ bits = X264_MAX(bits, x264_frame_size_estimated_get(t)); - rcc->buffer_fill += rcc->buffer_rate - bits; - rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size ); - } -@@ -1405,6 +1453,7 @@ static float rate_estimate_qscale( x264_t *h ) - q += rcc->pb_offset; - - rcc->frame_size_planned = predict_size( rcc->pred_b_from_p, q, h->fref1[h->i_ref1-1]->i_satd ); -+ x264_frame_size_estimated_set(h, rcc->frame_size_planned); - rcc->last_satd = 0; - return qp2qscale(q); - } -@@ -1425,6 +1474,24 @@ static float rate_estimate_qscale( x264_t *h ) - double w = x264_clip3f( time*100, 0.0, 1.0 ); - q *= pow( (double)total_bits / rcc->expected_bits_sum, w ); - } -+ if( rcc->b_vbv ) -+ { -+ double expected_size = qscale2bits(&rce, q); -+ double expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size; -+ double expected_fullness = rce.expected_vbv / rcc->buffer_size; -+ double qmax = q*(2 - expected_fullness); -+ double size_constraint = 1 + expected_fullness; -+ if (expected_fullness < .05) -+ qmax = lmax; -+ qmax = X264_MIN(qmax, lmax); -+ while( (expected_vbv < rce.expected_vbv/size_constraint) && (q < qmax) ) -+ { -+ q *= 1.05; -+ expected_size = qscale2bits(&rce, q); -+ expected_vbv = rcc->buffer_fill + rcc->buffer_rate - expected_size; -+ } -+ rcc->last_satd = x264_rc_analyse_slice( h ); -+ } - q = x264_clip3f( q, lmin, lmax ); - } - else /* 1pass ABR */ -@@ -1509,10 +1576,14 @@ static float rate_estimate_qscale( x264_t *h ) - rcc->last_qscale_for[pict_type] = - rcc->last_qscale = q; - -- if( !rcc->b_2pass && h->fenc->i_frame == 0 ) -+ if( !(rcc->b_2pass && !rcc->b_vbv) && h->fenc->i_frame == 0 ) - rcc->last_qscale_for[SLICE_TYPE_P] = q; - -- rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); -+ if( rcc->b_2pass && rcc->b_vbv) -+ rcc->frame_size_planned = qscale2bits(&rce, q); -+ else -+ rcc->frame_size_planned = predict_size( &rcc->pred[h->sh.i_type], q, rcc->last_satd ); -+ x264_frame_size_estimated_set(h, rcc->frame_size_planned); - return q; - } - } -@@ -1555,6 +1626,134 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ) - /* the rest of the variables are either constant or thread-local */ - } - -+static int find_underflow( x264_t *h, double *fills, int *t0, int *t1, int over ) -+{ -+ /* find an interval ending on an overflow or underflow (depending on whether -+ * we're adding or removing bits), and starting on the earliest frame that -+ * can influence the buffer fill of that end frame. */ -+ x264_ratecontrol_t *rcc = h->rc; -+ const double buffer_min = (over ? .1 : .1) * rcc->buffer_size; -+ const double buffer_max = .9 * rcc->buffer_size; -+ double fill = fills[*t0-1]; -+ double parity = over ? 1. : -1.; -+ int i, start=-1, end=-1; -+ for(i=*t0; inum_entries; i++) -+ { -+ fill += (rcc->buffer_rate - qscale2bits(&rcc->entry[i], rcc->entry[i].new_qscale)) * parity; -+ fill = x264_clip3f(fill, 0, rcc->buffer_size); -+ fills[i] = fill; -+ if(fill <= buffer_min || i == 0) -+ { -+ if(end >= 0) -+ break; -+ start = i; -+ } -+ else if(fill >= buffer_max && start >= 0) -+ end = i; -+ } -+ *t0 = start; -+ *t1 = end; -+ return start>=0 && end>=0; -+} -+ -+static int fix_underflow( x264_t *h, int t0, int t1, double adjustment, double qscale_min, double qscale_max) -+{ -+ x264_ratecontrol_t *rcc = h->rc; -+ double qscale_orig, qscale_new; -+ int i; -+ int adjusted = 0; -+ if(t0 > 0) -+ t0++; -+ for(i=t0; i<=t1; i++) { -+ qscale_orig = rcc->entry[i].new_qscale; -+ qscale_orig = x264_clip3f(qscale_orig, qscale_min, qscale_max); -+ qscale_new = qscale_orig * adjustment; -+ qscale_new = x264_clip3f(qscale_new, qscale_min, qscale_max); -+ rcc->entry[i].new_qscale = qscale_new; -+ adjusted = adjusted || (qscale_new != qscale_orig); -+ } -+ return adjusted; -+} -+ -+static double count_expected_bits( x264_t *h ) -+{ -+ x264_ratecontrol_t *rcc = h->rc; -+ double expected_bits = 0; -+ int i; -+ for(i=0; inum_entries; i++) -+ { -+ ratecontrol_entry_t *rce = &rcc->entry[i]; -+ rce->expected_bits = expected_bits; -+ expected_bits += qscale2bits(rce, rce->new_qscale); -+ } -+ return expected_bits; -+} -+ -+static void vbv_pass2( x264_t *h ) -+{ -+ /* foreach interval of buffer_full .. underflow -+ * uniformly increase the qp of all frames in the interval until either -+ * buffer is full at some intermediate frame -+ * or the last frame in the interval no longer underflows -+ * recompute intervals and repeat -+ * then do the converse to put bits back into overflow areas until target size is met */ -+ -+ x264_ratecontrol_t *rcc = h->rc; -+ double *fills = x264_malloc((rcc->num_entries+1)*sizeof(double)); -+ double all_available_bits = h->param.rc.i_bitrate * 1000. * rcc->num_entries / rcc->fps; -+ double expected_bits = 0; -+ double adjustment; -+ double prev_bits = 0; -+ int i, t0, t1; -+ double qscale_min = qp2qscale(h->param.rc.i_qp_min); -+ double qscale_max = qp2qscale(h->param.rc.i_qp_max); -+ int iterations = 0; -+ int adj_min, adj_max; -+ -+ fills++; -+ -+ //adjust overall stream size -+ do { -+ iterations++; -+ prev_bits = expected_bits; -+ -+ if (expected_bits != 0) { //not first iteration -+ adjustment = X264_MAX(X264_MIN(expected_bits / all_available_bits, 0.999), 0.9); -+ fills[-1] = rcc->buffer_size * h->param.rc.f_vbv_buffer_init; -+ t0 = 0; -+ //fix overflows -+ adj_min = 1; -+ while(adj_min && find_underflow(h, fills, &t0, &t1, 1)) -+ { -+ adj_min = fix_underflow(h, t0, t1, adjustment, qscale_min, qscale_max); -+ t0 = t1; -+ } -+ } -+ -+ fills[-1] = rcc->buffer_size * (1. - h->param.rc.f_vbv_buffer_init); -+ t0 = 0; -+ //fix underflows - should be done after overflow, as we'd better undersize target than underflowing VBV -+ adj_max = 1; -+ while(adj_max && find_underflow(h, fills, &t0, &t1, 0)) -+ { -+ adj_max = fix_underflow(h, t0, t1, 1.001, qscale_min, qscale_max); -+ } -+ -+ expected_bits = count_expected_bits(h); -+ } while(expected_bits < .995*all_available_bits && expected_bits > prev_bits); -+ -+ if (!adj_max) -+ x264_log( h, X264_LOG_WARNING, "vbv-maxrate issue, qpmax or vbv-maxrate too low\n"); -+ -+ //store expected vbv filling values for tracking when encoding -+ for(i=0; inum_entries; i++) -+ rcc->entry[i].expected_vbv = rcc->buffer_size - fills[i]; -+ -+// x264_log( h, X264_LOG_INFO, "VBV RC initial iterations: %d \n", iterations); -+ -+ x264_free(fills-1); -+} -+ - static int init_pass2( x264_t *h ) - { - x264_ratecontrol_t *rcc = h->rc; -@@ -1643,7 +1842,6 @@ static int init_pass2( x264_t *h ) - rcc->last_non_b_pict_type = -1; - rcc->last_accum_p_norm = 1; - rcc->accum_p_norm = 0; -- rcc->buffer_fill = rcc->buffer_size * h->param.rc.f_vbv_buffer_init; - - /* find qscale */ - for(i=0; inum_entries; i++){ -@@ -1680,18 +1878,11 @@ static int init_pass2( x264_t *h ) - /* find expected bits */ - for(i=0; inum_entries; i++){ - ratecontrol_entry_t *rce = &rcc->entry[i]; -- double bits; - rce->new_qscale = clip_qscale(h, rce->pict_type, blurred_qscale[i]); - assert(rce->new_qscale >= 0); -- bits = qscale2bits(rce, rce->new_qscale); -- -- rce->expected_bits = expected_bits; -- expected_bits += bits; -- update_vbv(h, bits); -- rcc->buffer_fill = rcc->buffer_fill_final; -+ expected_bits += qscale2bits(rce, rce->new_qscale); - } - --//printf("expected:%llu available:%llu factor:%lf avgQ:%lf\n", (uint64_t)expected_bits, all_available_bits, rate_factor); - if(expected_bits > all_available_bits) rate_factor -= step; - } - -@@ -1699,6 +1890,10 @@ static int init_pass2( x264_t *h ) - if(filter_size > 1) - x264_free(blurred_qscale); - -+ if(rcc->b_vbv) -+ vbv_pass2(h); -+ expected_bits = count_expected_bits(h); -+ - if(fabs(expected_bits/all_available_bits - 1.0) > 0.01) - { - double avgq = 0; -@@ -1706,7 +1901,8 @@ static int init_pass2( x264_t *h ) - avgq += rcc->entry[i].new_qscale; - avgq = qscale2qp(avgq / rcc->num_entries); - -- x264_log(h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n"); -+ if ((expected_bits > all_available_bits) || (!rcc->b_vbv)) -+ x264_log(h, X264_LOG_WARNING, "Error: 2pass curve failed to converge\n"); - x264_log(h, X264_LOG_WARNING, "target: %.2f kbit/s, expected: %.2f kbit/s, avg QP: %.4f\n", - (float)h->param.rc.i_bitrate, - expected_bits * rcc->fps / (rcc->num_entries * 1000.), -@@ -1725,7 +1921,7 @@ static int init_pass2( x264_t *h ) - else - x264_log(h, X264_LOG_WARNING, "try increasing target bitrate\n"); - } -- else -+ else if(!(rcc->b_2pass && rcc->b_vbv)) - x264_log(h, X264_LOG_WARNING, "internal error\n"); - } - -Index: encoder/ratecontrol.h -=================================================================== -old mode 100644 -new mode 100755 ---- encoder/ratecontrol.h -+++ encoder/ratecontrol.h -@@ -35,6 +35,8 @@ int x264_ratecontrol_qp( x264_t * ); - void x264_ratecontrol_end( x264_t *, int bits ); - void x264_ratecontrol_summary( x264_t * ); - void x264_adaptive_quant( x264_t * ); -+void x264_ratecontrol_set_estimated_size( x264_t *, int bits ); -+int x264_ratecontrol_get_estimated_size( x264_t const *); - - #endif diff --git a/contrib/version_x264.txt b/contrib/version_x264.txt index 6c77b386..fd427a4d 100644 --- a/contrib/version_x264.txt +++ b/contrib/version_x264.txt @@ -1 +1 @@ -http://download.m0k.org/handbrake/contrib/x264-r859-ce13bb6.tar.gz +http://download.m0k.org/handbrake/contrib/x264-r877-c74a8e2.tar.gz -- 2.11.0