contrib/patch-x264-aq.patch

   1 Index: encoder/ratecontrol.h
   2 ===================================================================
   3 --- encoder/ratecontrol.h       (revision 736)
   4 +++ encoder/ratecontrol.h       (working copy)
   5 @@ -34,6 +34,7 @@
   6  int  x264_ratecontrol_qp( x264_t * );
   7  void x264_ratecontrol_end( x264_t *, int bits );
   8  void x264_ratecontrol_summary( x264_t * );
   9 +void x264_adaptive_quant    ( x264_t * );
  10
  11  #endif
  12
  13 Index: encoder/encoder.c
  14 ===================================================================
  15 --- encoder/encoder.c   (revision 736)
  16 +++ encoder/encoder.c   (working copy)
  17 @@ -401,6 +401,7 @@
  18          h->param.analyse.b_fast_pskip = 0;
  19          h->param.analyse.i_noise_reduction = 0;
  20          h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
  21 +        h->param.analyse.b_aq = 0;
  22      }
  23      if( h->param.rc.i_rc_method == X264_RC_CQP )
  24      {
  25 @@ -475,6 +476,10 @@
  26      if( !h->param.b_cabac )
  27          h->param.analyse.i_trellis = 0;
  28      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
  29 +    h->param.analyse.b_aq = h->param.analyse.b_aq && h->param.analyse.f_aq_strength > 0;
  30 +    /* VAQ on static sensitivity mode effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
  31 +    if(h->param.analyse.b_aq && h->param.analyse.f_aq_sensitivity != 0)
  32 +        h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.analyse.f_aq_strength * 0.4 / 0.28, 0, 1);
  33      h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
  34
  35      {
  36 Index: encoder/ratecontrol.c
  37 ===================================================================
  38 --- encoder/ratecontrol.c       (revision 736)
  39 +++ encoder/ratecontrol.c       (working copy)
  40 @@ -127,6 +127,10 @@
  41      predictor_t *pred_b_from_p; /* predict B-frame size from P-frame satd */
  42      int bframes;                /* # consecutive B-frames before this P-frame */
  43      int bframe_bits;            /* total cost of those frames */
  44 +
  45 +    /* AQ stuff */
  46 +    float aq_threshold;
  47 +    int *ac_energy;
  48
  49      int i_zones;
  50      x264_zone_t *zones;
  51 @@ -169,7 +173,97 @@
  52             + rce->misc_bits;
  53  }
  54
  55 +// Find the total AC energy of the block in all planes.
  56 +static int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
  57 +{
  58 +    DECLARE_ALIGNED( static uint8_t, flat[16], 16 ) = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
  59 +//  DECLARE_ALIGNED( static uint8_t, flat[16], 16 );
  60 +    unsigned int var=0, sad, ssd, i;
  61 +    for( i=0; i<3; i++ )
  62 +    {
  63 +        int w = i ? 8 : 16;
  64 +        int stride = h->fenc->i_stride[i];
  65 +        int offset = h->mb.b_interlaced
  66 +            ? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
  67 +            : w * (mb_x + mb_y * stride);
  68 +        int pix = i ? PIXEL_8x8 : PIXEL_16x16;
  69 +        stride <<= h->mb.b_interlaced;
  70 +        sad = h->pixf.sad[pix](flat, 0, h->fenc->plane[i]+offset, stride);
  71 +        ssd = h->pixf.ssd[pix](flat, 0, h->fenc->plane[i]+offset, stride);
  72 +        var += ssd - (sad * sad >> (i?6:8));
  73 +        // SATD to represent the block's overall complexity (bit cost) for intra encoding.
  74 +        // exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
  75 +        if( var && satd )
  76 +            *satd += h->pixf.satd[pix](flat, 0, h->fenc->plane[i]+offset, stride) - sad/2;
  77 +    }
  78 +    return var;
  79 +}
  80 +
  81 +void x264_autosense_aq( x264_t *h )
  82 +{
  83 +    double total = 0;
  84 +    double n = 0;
  85 +    int mb_x, mb_y;
  86 +    /* FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?).  Can we reuse them? */
  87 +    /* FIXME: Is chroma SATD necessary? */
  88 +    for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
  89 +        for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
  90 +        {
  91 +            int energy, satd=0;
  92 +            energy = ac_energy_mb( h, mb_x, mb_y, &satd );
  93 +            h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
  94 +            /* Weight the energy value by the SATD value of the MB.  This represents the fact that
  95 +            the more complex blocks in a frame should be weighted more when calculating the optimal sensitivity.
  96 +            This also helps diminish the negative effect of large numbers of simple blocks in a frame, such as in the case
  97 +            of a letterboxed film. */
  98 +            if( energy )
  99 +            {
 100 +                x264_cpu_restore(h->param.cpu);
 101 +                total += logf(energy) * satd;
 102 +                n += satd;
 103 +            }
 104 +        }
 105 +    x264_cpu_restore(h->param.cpu);
 106 +    /* Calculate and store the threshold. */
 107 +    h->rc->aq_threshold = n ? total/n : 15;
 108 +}
 109
 110 +/*****************************************************************************
 111 +* x264_adaptive_quant:
 112 + * adjust macroblock QP based on variance (AC energy) of the MB.
 113 + * high variance  = higher QP
 114 + * low variance = lower QP
 115 + * This generally increases SSIM and lowers PSNR.
 116 +*****************************************************************************/
 117 +void x264_adaptive_quant( x264_t *h )
 118 +{
 119 +    int qp = h->mb.i_qp;
 120 +    int energy;
 121 +    x264_cpu_restore(h->param.cpu);
 122 +    if(h->param.analyse.f_aq_sensitivity != 0)
 123 +        energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
 124 +    else
 125 +        energy = h->rc->ac_energy[h->mb.i_mb_xy];
 126 +    if(energy == 0)
 127 +    {
 128 +        h->mb.i_qp = h->mb.i_last_qp;
 129 +    }
 130 +    else
 131 +    {
 132 +        x264_cpu_restore(h->param.cpu);
 133 +        float result = energy;
 134 +        /* Adjust the QP based on the AC energy of the macroblock. */
 135 +        float qp_adj = 3 * (logf(result) - h->rc->aq_threshold);
 136 +        if(h->param.analyse.f_aq_sensitivity == 0) qp_adj = x264_clip3f(qp_adj, -5, 5);
 137 +        int new_qp = x264_clip3(qp + qp_adj * h->param.analyse.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max);
 138 +        /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
 139 +         * to lower the bit cost of the qp_delta. */
 140 +        if(abs(new_qp - h->mb.i_last_qp) == 1) new_qp = h->mb.i_last_qp;
 141 +        h->mb.i_qp = new_qp;
 142 +    }
 143 +    h->mb.i_chroma_qp = i_chroma_qp_table[x264_clip3( h->mb.i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
 144 +}
 145 +
 146  int x264_ratecontrol_new( x264_t *h )
 147  {
 148      x264_ratecontrol_t *rc;
 149 @@ -244,7 +338,7 @@
 150          rc->rate_tolerance = 0.01;
 151      }
 152
 153 -    h->mb.b_variable_qp = rc->b_vbv && !rc->b_2pass;
 154 +    h->mb.b_variable_qp = (rc->b_vbv && !rc->b_2pass) || h->param.analyse.b_aq;
 155
 156      if( rc->b_abr )
 157      {
 158 @@ -458,10 +552,13 @@
 159          x264_free( p );
 160      }
 161
 162 -    for( i=1; i<h->param.i_threads; i++ )
 163 +    for( i=0; i<h->param.i_threads; i++ )
 164      {
 165          h->thread[i]->rc = rc+i;
 166 -        rc[i] = rc[0];
 167 +        if( i )
 168 +            rc[i] = rc[0];
 169 +        if( h->param.analyse.b_aq )
 170 +            rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
 171      }
 172
 173      return 0;
 174 @@ -623,6 +720,8 @@
 175                      x264_free( rc->zones[i].param );
 176          x264_free( rc->zones );
 177      }
 178 +    for( i=0; i<h->param.i_threads; i++ )
 179 +        x264_free( rc[i].ac_energy );
 180      x264_free( rc );
 181  }
 182
 183 @@ -729,6 +828,15 @@
 184
 185      if( h->sh.i_type != SLICE_TYPE_B )
 186          rc->last_non_b_pict_type = h->sh.i_type;
 187 +
 188 +    /* Adaptive AQ sensitivity algorithm. */
 189 +    if( h->param.analyse.b_aq )
 190 +    {
 191 +        if( h->param.analyse.f_aq_sensitivity > 0 )
 192 +            h->rc->aq_threshold = logf(powf(h->param.analyse.f_aq_sensitivity,4)/2); //FIXME simplify
 193 +        else
 194 +            x264_autosense_aq(h);
 195 +    }
 196  }
 197
 198  double predict_row_size( x264_t *h, int y, int qp )
 199 Index: encoder/analyse.c
 200 ===================================================================
 201 --- encoder/analyse.c   (revision 736)
 202 +++ encoder/analyse.c   (working copy)
 203 @@ -2047,8 +2047,13 @@
 204      int i_cost = COST_MAX;
 205      int i;
 206
 207 -    /* init analysis */
 208 -    x264_mb_analyse_init( h, &analysis, x264_ratecontrol_qp( h ) );
 209 +    h->mb.i_qp = x264_ratecontrol_qp( h );
 210 +
 211 +    if( h->param.analyse.b_aq )
 212 +        x264_adaptive_quant( h );
 213 +
 214 +     /* init analysis */
 215 +    x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
 216
 217      /*--------------------------- Do the analysis ---------------------------*/
 218      if( h->sh.i_type == SLICE_TYPE_I )
 219 Index: x264.c
 220 ===================================================================
 221 --- x264.c      (revision 736)
 222 +++ x264.c      (working copy)
 223 @@ -244,6 +244,14 @@
 224          "                                  - 2: enabled on all mode decisions\n", defaults->analyse.i_trellis );
 225      H0( "      --no-fast-pskip         Disables early SKIP detection on P-frames\n" );
 226      H0( "      --no-dct-decimate       Disables coefficient thresholding on P-frames\n" );
 227 +    H0( "      --aq-strength <float>   Amount to adjust QP/lambda per MB [%.1f]\n"
 228 +        "                                  0.0: no AQ\n"
 229 +        "                                  1.0: medium AQ\n", defaults->analyse.f_aq_strength );
 230 +    H0( "      --aq-sensitivity <float> \"Center\" of AQ curve. [%.1f]\n"
 231 +        "               0: automatic sensitivity (avoids moving bits between frames)\n"
 232 +        "               10: most QPs are raised\n"
 233 +        "               20: good general-use sensitivity\n"
 234 +        "               30: most QPs are lowered\n", defaults->analyse.f_aq_sensitivity );
 235      H0( "      --nr <integer>          Noise reduction [%d]\n", defaults->analyse.i_noise_reduction );
 236      H1( "\n" );
 237      H1( "      --deadzone-inter <int>  Set the size of the inter luma quantization deadzone [%d]\n", defaults->analyse.i_luma_deadzone[0] );
 238 @@ -407,6 +415,8 @@
 239              { "trellis", required_argument, NULL, 't' },
 240              { "no-fast-pskip", no_argument, NULL, 0 },
 241              { "no-dct-decimate", no_argument, NULL, 0 },
 242 +            { "aq-strength", required_argument, NULL, 0 },
 243 +            { "aq-sensitivity", required_argument, NULL, 0 },
 244              { "deadzone-inter", required_argument, NULL, '0' },
 245              { "deadzone-intra", required_argument, NULL, '0' },
 246              { "level",   required_argument, NULL, 0 },
 247 Index: common/common.c
 248 ===================================================================
 249 --- common/common.c     (revision 736)
 250 +++ common/common.c     (working copy)
 251 @@ -123,6 +123,9 @@
 252      param->analyse.i_chroma_qp_offset = 0;
 253      param->analyse.b_fast_pskip = 1;
 254      param->analyse.b_dct_decimate = 1;
 255 +    param->analyse.b_aq = 1;
 256 +    param->analyse.f_aq_strength = 0.5;
 257 +    param->analyse.f_aq_sensitivity = 13;
 258      param->analyse.i_luma_deadzone[0] = 21;
 259      param->analyse.i_luma_deadzone[1] = 11;
 260      param->analyse.b_psnr = 1;
 261 @@ -455,6 +458,13 @@
 262          p->analyse.b_fast_pskip = atobool(value);
 263      OPT("dct-decimate")
 264          p->analyse.b_dct_decimate = atobool(value);
 265 +    OPT("aq-strength")
 266 +    {
 267 +        p->analyse.f_aq_strength = atof(value);
 268 +        p->analyse.b_aq = 1;
 269 +    }
 270 +    OPT("aq-sensitivity")
 271 +        p->analyse.f_aq_sensitivity = atof(value);
 272      OPT("deadzone-inter")
 273          p->analyse.i_luma_deadzone[0] = atoi(value);
 274      OPT("deadzone-intra")
 275 @@ -883,6 +893,10 @@
 276          s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
 277          if( p->i_bframe )
 278              s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
 279 +        if( p->analyse.b_aq )
 280 +            s += sprintf( s, " aq=1:%.1f:%.1f", p->analyse.f_aq_strength, p->analyse.f_aq_sensitivity );
 281 +        else
 282 +            s += sprintf( s, " aq=0" );
 283          if( p->rc.psz_zones )
 284              s += sprintf( s, " zones=%s", p->rc.psz_zones );
 285          else if( p->rc.i_zones )
 286 Index: x264.h
 287 ===================================================================
 288 --- x264.h      (revision 736)
 289 +++ x264.h      (working copy)
 290 @@ -232,6 +232,9 @@
 291          int          i_trellis;  /* trellis RD quantization */
 292          int          b_fast_pskip; /* early SKIP detection on P-frames */
 293          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
 294 +        int          b_aq; /* psy adaptive QP */
 295 +        float        f_aq_strength;
 296 +        float        f_aq_sensitivity;
 297          int          i_noise_reduction; /* adaptive pseudo-deadzone */
 298
 299          /* the deadzone size that will be used in luma quantization */