OSDN Git Service

libavcodec/vp9: ipred_dl_32x32_16 avx2 implementation
[android-x86/external-ffmpeg.git] / libavcodec / aaccoder.c
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC coefficients encoder
25  */
26
27 /***********************************
28  *              TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32
33 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34
35 #include <float.h>
36
37 #include "libavutil/mathematics.h"
38 #include "mathops.h"
39 #include "avcodec.h"
40 #include "put_bits.h"
41 #include "aac.h"
42 #include "aacenc.h"
43 #include "aactab.h"
44 #include "aacenctab.h"
45 #include "aacenc_utils.h"
46 #include "aacenc_quantization.h"
47
48 #include "aacenc_is.h"
49 #include "aacenc_tns.h"
50 #include "aacenc_ltp.h"
51 #include "aacenc_pred.h"
52
53 #include "libavcodec/aaccoder_twoloop.h"
54
55 /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
56  * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
57 #define NOISE_SPREAD_THRESHOLD 0.9f
58
59 /* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
60  * replace low energy non zero bands */
61 #define NOISE_LAMBDA_REPLACE 1.948f
62
63 #include "libavcodec/aaccoder_trellis.h"
64
65 /**
66  * structure used in optimal codebook search
67  */
68 typedef struct BandCodingPath {
69     int prev_idx; ///< pointer to the previous path point
70     float cost;   ///< path cost
71     int run;
72 } BandCodingPath;
73
74 /**
75  * Encode band info for single window group bands.
76  */
77 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
78                                      int win, int group_len, const float lambda)
79 {
80     BandCodingPath path[120][CB_TOT_ALL];
81     int w, swb, cb, start, size;
82     int i, j;
83     const int max_sfb  = sce->ics.max_sfb;
84     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
85     const int run_esc  = (1 << run_bits) - 1;
86     int idx, ppos, count;
87     int stackrun[120], stackcb[120], stack_len;
88     float next_minrd = INFINITY;
89     int next_mincb = 0;
90
91     s->abs_pow34(s->scoefs, sce->coeffs, 1024);
92     start = win*128;
93     for (cb = 0; cb < CB_TOT_ALL; cb++) {
94         path[0][cb].cost     = 0.0f;
95         path[0][cb].prev_idx = -1;
96         path[0][cb].run      = 0;
97     }
98     for (swb = 0; swb < max_sfb; swb++) {
99         size = sce->ics.swb_sizes[swb];
100         if (sce->zeroes[win*16 + swb]) {
101             for (cb = 0; cb < CB_TOT_ALL; cb++) {
102                 path[swb+1][cb].prev_idx = cb;
103                 path[swb+1][cb].cost     = path[swb][cb].cost;
104                 path[swb+1][cb].run      = path[swb][cb].run + 1;
105             }
106         } else {
107             float minrd = next_minrd;
108             int mincb = next_mincb;
109             next_minrd = INFINITY;
110             next_mincb = 0;
111             for (cb = 0; cb < CB_TOT_ALL; cb++) {
112                 float cost_stay_here, cost_get_here;
113                 float rd = 0.0f;
114                 if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
115                     cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
116                     path[swb+1][cb].prev_idx = -1;
117                     path[swb+1][cb].cost     = INFINITY;
118                     path[swb+1][cb].run      = path[swb][cb].run + 1;
119                     continue;
120                 }
121                 for (w = 0; w < group_len; w++) {
122                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
123                     rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
124                                              &s->scoefs[start + w*128], size,
125                                              sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
126                                              lambda / band->threshold, INFINITY, NULL, NULL, 0);
127                 }
128                 cost_stay_here = path[swb][cb].cost + rd;
129                 cost_get_here  = minrd              + rd + run_bits + 4;
130                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
131                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
132                     cost_stay_here += run_bits;
133                 if (cost_get_here < cost_stay_here) {
134                     path[swb+1][cb].prev_idx = mincb;
135                     path[swb+1][cb].cost     = cost_get_here;
136                     path[swb+1][cb].run      = 1;
137                 } else {
138                     path[swb+1][cb].prev_idx = cb;
139                     path[swb+1][cb].cost     = cost_stay_here;
140                     path[swb+1][cb].run      = path[swb][cb].run + 1;
141                 }
142                 if (path[swb+1][cb].cost < next_minrd) {
143                     next_minrd = path[swb+1][cb].cost;
144                     next_mincb = cb;
145                 }
146             }
147         }
148         start += sce->ics.swb_sizes[swb];
149     }
150
151     //convert resulting path from backward-linked list
152     stack_len = 0;
153     idx       = 0;
154     for (cb = 1; cb < CB_TOT_ALL; cb++)
155         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
156             idx = cb;
157     ppos = max_sfb;
158     while (ppos > 0) {
159         av_assert1(idx >= 0);
160         cb = idx;
161         stackrun[stack_len] = path[ppos][cb].run;
162         stackcb [stack_len] = cb;
163         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
164         ppos -= path[ppos][cb].run;
165         stack_len++;
166     }
167     //perform actual band info encoding
168     start = 0;
169     for (i = stack_len - 1; i >= 0; i--) {
170         cb = aac_cb_out_map[stackcb[i]];
171         put_bits(&s->pb, 4, cb);
172         count = stackrun[i];
173         memset(sce->zeroes + win*16 + start, !cb, count);
174         //XXX: memset when band_type is also uint8_t
175         for (j = 0; j < count; j++) {
176             sce->band_type[win*16 + start] = cb;
177             start++;
178         }
179         while (count >= run_esc) {
180             put_bits(&s->pb, run_bits, run_esc);
181             count -= run_esc;
182         }
183         put_bits(&s->pb, run_bits, count);
184     }
185 }
186
187
188 typedef struct TrellisPath {
189     float cost;
190     int prev;
191 } TrellisPath;
192
193 #define TRELLIS_STAGES 121
194 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
195
196 static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
197 {
198     int w, g;
199     int prevscaler_n = -255, prevscaler_i = 0;
200     int bands = 0;
201
202     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
203         for (g = 0; g < sce->ics.num_swb; g++) {
204             if (sce->zeroes[w*16+g])
205                 continue;
206             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
207                 sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
208                 bands++;
209             } else if (sce->band_type[w*16+g] == NOISE_BT) {
210                 sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
211                 if (prevscaler_n == -255)
212                     prevscaler_n = sce->sf_idx[w*16+g];
213                 bands++;
214             }
215         }
216     }
217
218     if (!bands)
219         return;
220
221     /* Clip the scalefactor indices */
222     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
223         for (g = 0; g < sce->ics.num_swb; g++) {
224             if (sce->zeroes[w*16+g])
225                 continue;
226             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
227                 sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
228             } else if (sce->band_type[w*16+g] == NOISE_BT) {
229                 sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
230             }
231         }
232     }
233 }
234
235 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
236                                        SingleChannelElement *sce,
237                                        const float lambda)
238 {
239     int q, w, w2, g, start = 0;
240     int i, j;
241     int idx;
242     TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
243     int bandaddr[TRELLIS_STAGES];
244     int minq;
245     float mincost;
246     float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
247     int q0, q1, qcnt = 0;
248
249     for (i = 0; i < 1024; i++) {
250         float t = fabsf(sce->coeffs[i]);
251         if (t > 0.0f) {
252             q0f = FFMIN(q0f, t);
253             q1f = FFMAX(q1f, t);
254             qnrgf += t*t;
255             qcnt++;
256         }
257     }
258
259     if (!qcnt) {
260         memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
261         memset(sce->zeroes, 1, sizeof(sce->zeroes));
262         return;
263     }
264
265     //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
266     q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
267     //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
268     q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
269     if (q1 - q0 > 60) {
270         int q0low  = q0;
271         int q1high = q1;
272         //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
273         int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
274         q1 = qnrg + 30;
275         q0 = qnrg - 30;
276         if (q0 < q0low) {
277             q1 += q0low - q0;
278             q0  = q0low;
279         } else if (q1 > q1high) {
280             q0 -= q1 - q1high;
281             q1  = q1high;
282         }
283     }
284     // q0 == q1 isn't really a legal situation
285     if (q0 == q1) {
286         // the following is indirect but guarantees q1 != q0 && q1 near q0
287         q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
288         q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
289     }
290
291     for (i = 0; i < TRELLIS_STATES; i++) {
292         paths[0][i].cost    = 0.0f;
293         paths[0][i].prev    = -1;
294     }
295     for (j = 1; j < TRELLIS_STAGES; j++) {
296         for (i = 0; i < TRELLIS_STATES; i++) {
297             paths[j][i].cost    = INFINITY;
298             paths[j][i].prev    = -2;
299         }
300     }
301     idx = 1;
302     s->abs_pow34(s->scoefs, sce->coeffs, 1024);
303     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
304         start = w*128;
305         for (g = 0; g < sce->ics.num_swb; g++) {
306             const float *coefs = &sce->coeffs[start];
307             float qmin, qmax;
308             int nz = 0;
309
310             bandaddr[idx] = w * 16 + g;
311             qmin = INT_MAX;
312             qmax = 0.0f;
313             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
314                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
315                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
316                     sce->zeroes[(w+w2)*16+g] = 1;
317                     continue;
318                 }
319                 sce->zeroes[(w+w2)*16+g] = 0;
320                 nz = 1;
321                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
322                     float t = fabsf(coefs[w2*128+i]);
323                     if (t > 0.0f)
324                         qmin = FFMIN(qmin, t);
325                     qmax = FFMAX(qmax, t);
326                 }
327             }
328             if (nz) {
329                 int minscale, maxscale;
330                 float minrd = INFINITY;
331                 float maxval;
332                 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
333                 minscale = coef2minsf(qmin);
334                 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
335                 maxscale = coef2maxsf(qmax);
336                 minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
337                 maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
338                 if (minscale == maxscale) {
339                     maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
340                     minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
341                 }
342                 maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
343                 for (q = minscale; q < maxscale; q++) {
344                     float dist = 0;
345                     int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
346                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
347                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
348                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
349                                                    q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
350                     }
351                     minrd = FFMIN(minrd, dist);
352
353                     for (i = 0; i < q1 - q0; i++) {
354                         float cost;
355                         cost = paths[idx - 1][i].cost + dist
356                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
357                         if (cost < paths[idx][q].cost) {
358                             paths[idx][q].cost    = cost;
359                             paths[idx][q].prev    = i;
360                         }
361                     }
362                 }
363             } else {
364                 for (q = 0; q < q1 - q0; q++) {
365                     paths[idx][q].cost = paths[idx - 1][q].cost + 1;
366                     paths[idx][q].prev = q;
367                 }
368             }
369             sce->zeroes[w*16+g] = !nz;
370             start += sce->ics.swb_sizes[g];
371             idx++;
372         }
373     }
374     idx--;
375     mincost = paths[idx][0].cost;
376     minq    = 0;
377     for (i = 1; i < TRELLIS_STATES; i++) {
378         if (paths[idx][i].cost < mincost) {
379             mincost = paths[idx][i].cost;
380             minq = i;
381         }
382     }
383     while (idx) {
384         sce->sf_idx[bandaddr[idx]] = minq + q0;
385         minq = FFMAX(paths[idx][minq].prev, 0);
386         idx--;
387     }
388     //set the same quantizers inside window groups
389     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
390         for (g = 0; g < sce->ics.num_swb; g++)
391             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
392                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
393 }
394
395 static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
396                                        SingleChannelElement *sce,
397                                        const float lambda)
398 {
399     int start = 0, i, w, w2, g;
400     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
401     float dists[128] = { 0 }, uplims[128] = { 0 };
402     float maxvals[128];
403     int fflag, minscaler;
404     int its  = 0;
405     int allz = 0;
406     float minthr = INFINITY;
407
408     // for values above this the decoder might end up in an endless loop
409     // due to always having more bits than what can be encoded.
410     destbits = FFMIN(destbits, 5800);
411     //some heuristic to determine initial quantizers will reduce search time
412     //determine zero bands and upper limits
413     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
414         start = 0;
415         for (g = 0; g < sce->ics.num_swb; g++) {
416             int nz = 0;
417             float uplim = 0.0f, energy = 0.0f;
418             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
419                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
420                 uplim += band->threshold;
421                 energy += band->energy;
422                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
423                     sce->zeroes[(w+w2)*16+g] = 1;
424                     continue;
425                 }
426                 nz = 1;
427             }
428             uplims[w*16+g] = uplim *512;
429             sce->band_type[w*16+g] = 0;
430             sce->zeroes[w*16+g] = !nz;
431             if (nz)
432                 minthr = FFMIN(minthr, uplim);
433             allz |= nz;
434             start += sce->ics.swb_sizes[g];
435         }
436     }
437     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
438         for (g = 0; g < sce->ics.num_swb; g++) {
439             if (sce->zeroes[w*16+g]) {
440                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
441                 continue;
442             }
443             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
444         }
445     }
446
447     if (!allz)
448         return;
449     s->abs_pow34(s->scoefs, sce->coeffs, 1024);
450     ff_quantize_band_cost_cache_init(s);
451
452     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
453         start = w*128;
454         for (g = 0; g < sce->ics.num_swb; g++) {
455             const float *scaled = s->scoefs + start;
456             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
457             start += sce->ics.swb_sizes[g];
458         }
459     }
460
461     //perform two-loop search
462     //outer loop - improve quality
463     do {
464         int tbits, qstep;
465         minscaler = sce->sf_idx[0];
466         //inner loop - quantize spectrum to fit into given number of bits
467         qstep = its ? 1 : 32;
468         do {
469             int prev = -1;
470             tbits = 0;
471             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
472                 start = w*128;
473                 for (g = 0; g < sce->ics.num_swb; g++) {
474                     const float *coefs = sce->coeffs + start;
475                     const float *scaled = s->scoefs + start;
476                     int bits = 0;
477                     int cb;
478                     float dist = 0.0f;
479
480                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
481                         start += sce->ics.swb_sizes[g];
482                         continue;
483                     }
484                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
485                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
486                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
487                         int b;
488                         dist += quantize_band_cost_cached(s, w + w2, g,
489                                                           coefs + w2*128,
490                                                           scaled + w2*128,
491                                                           sce->ics.swb_sizes[g],
492                                                           sce->sf_idx[w*16+g],
493                                                           cb, 1.0f, INFINITY,
494                                                           &b, NULL, 0);
495                         bits += b;
496                     }
497                     dists[w*16+g] = dist - bits;
498                     if (prev != -1) {
499                         bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
500                     }
501                     tbits += bits;
502                     start += sce->ics.swb_sizes[g];
503                     prev = sce->sf_idx[w*16+g];
504                 }
505             }
506             if (tbits > destbits) {
507                 for (i = 0; i < 128; i++)
508                     if (sce->sf_idx[i] < 218 - qstep)
509                         sce->sf_idx[i] += qstep;
510             } else {
511                 for (i = 0; i < 128; i++)
512                     if (sce->sf_idx[i] > 60 - qstep)
513                         sce->sf_idx[i] -= qstep;
514             }
515             qstep >>= 1;
516             if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
517                 qstep = 1;
518         } while (qstep);
519
520         fflag = 0;
521         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
522
523         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
524             for (g = 0; g < sce->ics.num_swb; g++) {
525                 int prevsc = sce->sf_idx[w*16+g];
526                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
527                     if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
528                         sce->sf_idx[w*16+g]--;
529                     else //Try to make sure there is some energy in every band
530                         sce->sf_idx[w*16+g]-=2;
531                 }
532                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
533                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
534                 if (sce->sf_idx[w*16+g] != prevsc)
535                     fflag = 1;
536                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
537             }
538         }
539         its++;
540     } while (fflag && its < 10);
541 }
542
543 static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
544 {
545     FFPsyBand *band;
546     int w, g, w2, i;
547     int wlen = 1024 / sce->ics.num_windows;
548     int bandwidth, cutoff;
549     float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
550     float *NOR34 = &s->scoefs[3*128];
551     uint8_t nextband[128];
552     const float lambda = s->lambda;
553     const float freq_mult = avctx->sample_rate*0.5f/wlen;
554     const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
555     const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
556     const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
557     const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
558
559     int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
560         / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
561         * (lambda / 120.f);
562
563     /** Keep this in sync with twoloop's cutoff selection */
564     float rate_bandwidth_multiplier = 1.5f;
565     int prev = -1000, prev_sf = -1;
566     int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
567         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
568         : (avctx->bit_rate / avctx->channels);
569
570     frame_bit_rate *= 1.15f;
571
572     if (avctx->cutoff > 0) {
573         bandwidth = avctx->cutoff;
574     } else {
575         bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
576     }
577
578     cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
579
580     memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
581     ff_init_nextband_map(sce, nextband);
582     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
583         int wstart = w*128;
584         for (g = 0; g < sce->ics.num_swb; g++) {
585             int noise_sfi;
586             float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
587             float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
588             float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
589             float min_energy = -1.0f, max_energy = 0.0f;
590             const int start = wstart+sce->ics.swb_offset[g];
591             const float freq = (start-wstart)*freq_mult;
592             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
593             if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
594                 if (!sce->zeroes[w*16+g])
595                     prev_sf = sce->sf_idx[w*16+g];
596                 continue;
597             }
598             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
599                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
600                 sfb_energy += band->energy;
601                 spread     = FFMIN(spread, band->spread);
602                 threshold  += band->threshold;
603                 if (!w2) {
604                     min_energy = max_energy = band->energy;
605                 } else {
606                     min_energy = FFMIN(min_energy, band->energy);
607                     max_energy = FFMAX(max_energy, band->energy);
608                 }
609             }
610
611             /* Ramps down at ~8000Hz and loosens the dist threshold */
612             dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
613
614             /* PNS is acceptable when all of these are true:
615              * 1. high spread energy (noise-like band)
616              * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
617              * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
618              *
619              * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
620              */
621             if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
622                 ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
623                 (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
624                 min_energy < pns_transient_energy_r * max_energy ) {
625                 sce->pns_ener[w*16+g] = sfb_energy;
626                 if (!sce->zeroes[w*16+g])
627                     prev_sf = sce->sf_idx[w*16+g];
628                 continue;
629             }
630
631             pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
632             noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
633             noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
634             if (prev != -1000) {
635                 int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
636                 if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
637                     if (!sce->zeroes[w*16+g])
638                         prev_sf = sce->sf_idx[w*16+g];
639                     continue;
640                 }
641             }
642             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
643                 float band_energy, scale, pns_senergy;
644                 const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
645                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
646                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
647                     s->random_state  = lcg_random(s->random_state);
648                     PNS[i] = s->random_state;
649                 }
650                 band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
651                 scale = noise_amp/sqrtf(band_energy);
652                 s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
653                 pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
654                 pns_energy += pns_senergy;
655                 s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
656                 s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]);
657                 dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
658                                             NOR34,
659                                             sce->ics.swb_sizes[g],
660                                             sce->sf_idx[(w+w2)*16+g],
661                                             sce->band_alt[(w+w2)*16+g],
662                                             lambda/band->threshold, INFINITY, NULL, NULL, 0);
663                 /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
664                 dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
665             }
666             if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
667                 dist2 += 5;
668             } else {
669                 dist2 += 9;
670             }
671             energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
672             sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
673             if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
674                 sce->band_type[w*16+g] = NOISE_BT;
675                 sce->zeroes[w*16+g] = 0;
676                 prev = noise_sfi;
677             } else {
678                 if (!sce->zeroes[w*16+g])
679                     prev_sf = sce->sf_idx[w*16+g];
680             }
681         }
682     }
683 }
684
685 static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
686 {
687     FFPsyBand *band;
688     int w, g, w2;
689     int wlen = 1024 / sce->ics.num_windows;
690     int bandwidth, cutoff;
691     const float lambda = s->lambda;
692     const float freq_mult = avctx->sample_rate*0.5f/wlen;
693     const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
694     const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
695
696     int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
697         / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
698         * (lambda / 120.f);
699
700     /** Keep this in sync with twoloop's cutoff selection */
701     float rate_bandwidth_multiplier = 1.5f;
702     int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
703         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
704         : (avctx->bit_rate / avctx->channels);
705
706     frame_bit_rate *= 1.15f;
707
708     if (avctx->cutoff > 0) {
709         bandwidth = avctx->cutoff;
710     } else {
711         bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
712     }
713
714     cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
715
716     memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
717     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
718         for (g = 0; g < sce->ics.num_swb; g++) {
719             float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
720             float min_energy = -1.0f, max_energy = 0.0f;
721             const int start = sce->ics.swb_offset[g];
722             const float freq = start*freq_mult;
723             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
724             if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
725                 sce->can_pns[w*16+g] = 0;
726                 continue;
727             }
728             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
729                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
730                 sfb_energy += band->energy;
731                 spread     = FFMIN(spread, band->spread);
732                 threshold  += band->threshold;
733                 if (!w2) {
734                     min_energy = max_energy = band->energy;
735                 } else {
736                     min_energy = FFMIN(min_energy, band->energy);
737                     max_energy = FFMAX(max_energy, band->energy);
738                 }
739             }
740
741             /* PNS is acceptable when all of these are true:
742              * 1. high spread energy (noise-like band)
743              * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
744              * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
745              */
746             sce->pns_ener[w*16+g] = sfb_energy;
747             if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
748                 sce->can_pns[w*16+g] = 0;
749             } else {
750                 sce->can_pns[w*16+g] = 1;
751             }
752         }
753     }
754 }
755
756 static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
757 {
758     int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
759     uint8_t nextband0[128], nextband1[128];
760     float *M   = s->scoefs + 128*0, *S   = s->scoefs + 128*1;
761     float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3;
762     float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5;
763     const float lambda = s->lambda;
764     const float mslambda = FFMIN(1.0f, lambda / 120.f);
765     SingleChannelElement *sce0 = &cpe->ch[0];
766     SingleChannelElement *sce1 = &cpe->ch[1];
767     if (!cpe->common_window)
768         return;
769
770     /** Scout out next nonzero bands */
771     ff_init_nextband_map(sce0, nextband0);
772     ff_init_nextband_map(sce1, nextband1);
773
774     prev_mid = sce0->sf_idx[0];
775     prev_side = sce1->sf_idx[0];
776     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
777         start = 0;
778         for (g = 0; g < sce0->ics.num_swb; g++) {
779             float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
780             if (!cpe->is_mask[w*16+g])
781                 cpe->ms_mask[w*16+g] = 0;
782             if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
783                 float Mmax = 0.0f, Smax = 0.0f;
784
785                 /* Must compute mid/side SF and book for the whole window group */
786                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
787                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
788                         M[i] = (sce0->coeffs[start+(w+w2)*128+i]
789                               + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
790                         S[i] =  M[i]
791                               - sce1->coeffs[start+(w+w2)*128+i];
792                     }
793                     s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]);
794                     s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]);
795                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
796                         Mmax = FFMAX(Mmax, M34[i]);
797                         Smax = FFMAX(Smax, S34[i]);
798                     }
799                 }
800
801                 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
802                     float dist1 = 0.0f, dist2 = 0.0f;
803                     int B0 = 0, B1 = 0;
804                     int minidx;
805                     int mididx, sididx;
806                     int midcb, sidcb;
807
808                     minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
809                     mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
810                     sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
811                     if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
812                         && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
813                             || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
814                         /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
815                         continue;
816                     }
817
818                     midcb = find_min_book(Mmax, mididx);
819                     sidcb = find_min_book(Smax, sididx);
820
821                     /* No CB can be zero */
822                     midcb = FFMAX(1,midcb);
823                     sidcb = FFMAX(1,sidcb);
824
825                     for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
826                         FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
827                         FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
828                         float minthr = FFMIN(band0->threshold, band1->threshold);
829                         int b1,b2,b3,b4;
830                         for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
831                             M[i] = (sce0->coeffs[start+(w+w2)*128+i]
832                                   + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
833                             S[i] =  M[i]
834                                   - sce1->coeffs[start+(w+w2)*128+i];
835                         }
836
837                         s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
838                         s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
839                         s->abs_pow34(M34, M,                         sce0->ics.swb_sizes[g]);
840                         s->abs_pow34(S34, S,                         sce0->ics.swb_sizes[g]);
841                         dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
842                                                     L34,
843                                                     sce0->ics.swb_sizes[g],
844                                                     sce0->sf_idx[w*16+g],
845                                                     sce0->band_type[w*16+g],
846                                                     lambda / band0->threshold, INFINITY, &b1, NULL, 0);
847                         dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
848                                                     R34,
849                                                     sce1->ics.swb_sizes[g],
850                                                     sce1->sf_idx[w*16+g],
851                                                     sce1->band_type[w*16+g],
852                                                     lambda / band1->threshold, INFINITY, &b2, NULL, 0);
853                         dist2 += quantize_band_cost(s, M,
854                                                     M34,
855                                                     sce0->ics.swb_sizes[g],
856                                                     mididx,
857                                                     midcb,
858                                                     lambda / minthr, INFINITY, &b3, NULL, 0);
859                         dist2 += quantize_band_cost(s, S,
860                                                     S34,
861                                                     sce1->ics.swb_sizes[g],
862                                                     sididx,
863                                                     sidcb,
864                                                     mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
865                         B0 += b1+b2;
866                         B1 += b3+b4;
867                         dist1 -= b1+b2;
868                         dist2 -= b3+b4;
869                     }
870                     cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
871                     if (cpe->ms_mask[w*16+g]) {
872                         if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
873                             sce0->sf_idx[w*16+g] = mididx;
874                             sce1->sf_idx[w*16+g] = sididx;
875                             sce0->band_type[w*16+g] = midcb;
876                             sce1->band_type[w*16+g] = sidcb;
877                         } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
878                             /* ms_mask unneeded, and it confuses some decoders */
879                             cpe->ms_mask[w*16+g] = 0;
880                         }
881                         break;
882                     } else if (B1 > B0) {
883                         /* More boost won't fix this */
884                         break;
885                     }
886                 }
887             }
888             if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
889                 prev_mid = sce0->sf_idx[w*16+g];
890             if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
891                 prev_side = sce1->sf_idx[w*16+g];
892             start += sce0->ics.swb_sizes[g];
893         }
894     }
895 }
896
897 AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
898     [AAC_CODER_ANMR] = {
899         search_for_quantizers_anmr,
900         encode_window_bands_info,
901         quantize_and_encode_band,
902         ff_aac_encode_tns_info,
903         ff_aac_encode_ltp_info,
904         ff_aac_encode_main_pred,
905         ff_aac_adjust_common_pred,
906         ff_aac_adjust_common_ltp,
907         ff_aac_apply_main_pred,
908         ff_aac_apply_tns,
909         ff_aac_update_ltp,
910         ff_aac_ltp_insert_new_frame,
911         set_special_band_scalefactors,
912         search_for_pns,
913         mark_pns,
914         ff_aac_search_for_tns,
915         ff_aac_search_for_ltp,
916         search_for_ms,
917         ff_aac_search_for_is,
918         ff_aac_search_for_pred,
919     },
920     [AAC_CODER_TWOLOOP] = {
921         search_for_quantizers_twoloop,
922         codebook_trellis_rate,
923         quantize_and_encode_band,
924         ff_aac_encode_tns_info,
925         ff_aac_encode_ltp_info,
926         ff_aac_encode_main_pred,
927         ff_aac_adjust_common_pred,
928         ff_aac_adjust_common_ltp,
929         ff_aac_apply_main_pred,
930         ff_aac_apply_tns,
931         ff_aac_update_ltp,
932         ff_aac_ltp_insert_new_frame,
933         set_special_band_scalefactors,
934         search_for_pns,
935         mark_pns,
936         ff_aac_search_for_tns,
937         ff_aac_search_for_ltp,
938         search_for_ms,
939         ff_aac_search_for_is,
940         ff_aac_search_for_pred,
941     },
942     [AAC_CODER_FAST] = {
943         search_for_quantizers_fast,
944         codebook_trellis_rate,
945         quantize_and_encode_band,
946         ff_aac_encode_tns_info,
947         ff_aac_encode_ltp_info,
948         ff_aac_encode_main_pred,
949         ff_aac_adjust_common_pred,
950         ff_aac_adjust_common_ltp,
951         ff_aac_apply_main_pred,
952         ff_aac_apply_tns,
953         ff_aac_update_ltp,
954         ff_aac_ltp_insert_new_frame,
955         set_special_band_scalefactors,
956         search_for_pns,
957         mark_pns,
958         ff_aac_search_for_tns,
959         ff_aac_search_for_ltp,
960         search_for_ms,
961         ff_aac_search_for_is,
962         ff_aac_search_for_pred,
963     },
964 };