src/lib/doslib/ext/speex/filters.c

   1 /* Copyright (C) 2002-2006 Jean-Marc Valin
   2    File: filters.c
   3    Various analysis/synthesis filters
   4
   5    Redistribution and use in source and binary forms, with or without
   6    modification, are permitted provided that the following conditions
   7    are met:
   8
   9    - Redistributions of source code must retain the above copyright
  10    notice, this list of conditions and the following disclaimer.
  11
  12    - Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15
  16    - Neither the name of the Xiph.org Foundation nor the names of its
  17    contributors may be used to endorse or promote products derived from
  18    this software without specific prior written permission.
  19
  20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
  24    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31 */
  32
  33 #ifdef HAVE_CONFIG_H
  34 #include "config.h"
  35 #endif
  36
  37 #include "filters.h"
  38 #include "stack_alloc.h"
  39 #include "arch.h"
  40 #include "math_approx.h"
  41 #include "ltp.h"
  42 #include <math.h>
  43
  44 #ifdef _USE_SSE
  45 #include "filters_sse.h"
  46 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
  47 #include "filters_arm4.h"
  48 #elif defined (BFIN_ASM)
  49 #include "filters_bfin.h"
  50 #endif
  51
  52
  53
  54 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
  55 {
  56    int i;
  57    spx_word16_t tmp=gamma;
  58    for (i=0;i<order;i++)
  59    {
  60       lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
  61       tmp = MULT16_16_P15(tmp, gamma);
  62    }
  63 }
  64
  65 void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
  66 {
  67    int i;
  68    for (i=0;i<len;i++)
  69    {
  70       /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
  71       if (!(vec[i]>=min_val && vec[i] <= max_val))
  72       {
  73          if (vec[i] < min_val)
  74             vec[i] = min_val;
  75          else if (vec[i] > max_val)
  76             vec[i] = max_val;
  77          else /* Has to be NaN */
  78             vec[i] = 0;
  79       }
  80    }
  81 }
  82
  83 void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
  84 {
  85    int i;
  86 #ifdef FIXED_POINT
  87    const spx_word16_t Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
  88    const spx_word16_t Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
  89 #else
  90    const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
  91    const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
  92 #endif
  93    const spx_word16_t *den, *num;
  94    if (filtID>4)
  95       filtID=4;
  96
  97    den = Pcoef[filtID]; num = Zcoef[filtID];
  98    /*return;*/
  99    for (i=0;i<len;i++)
 100    {
 101       spx_word16_t yi;
 102       spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
 103       yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
 104       mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
 105       mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
 106       y[i] = yi;
 107    }
 108 }
 109
 110 #ifdef FIXED_POINT
 111
 112 /* FIXME: These functions are ugly and probably introduce too much error */
 113 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 114 {
 115    int i;
 116    for (i=0;i<len;i++)
 117    {
 118       y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
 119    }
 120 }
 121
 122 void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
 123 {
 124    int i;
 125    if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
 126    {
 127       spx_word16_t scale_1;
 128       scale = PSHR32(scale, SIG_SHIFT);
 129       scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
 130       for (i=0;i<len;i++)
 131       {
 132          y[i] = MULT16_16_P15(scale_1, x[i]);
 133       }
 134    } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
 135       spx_word16_t scale_1;
 136       scale = PSHR32(scale, SIG_SHIFT-5);
 137       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
 138       for (i=0;i<len;i++)
 139       {
 140          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
 141       }
 142    } else {
 143       spx_word16_t scale_1;
 144       scale = PSHR32(scale, SIG_SHIFT-7);
 145       if (scale < 5)
 146          scale = 5;
 147       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
 148       for (i=0;i<len;i++)
 149       {
 150          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
 151       }
 152    }
 153 }
 154
 155 #else
 156
 157 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 158 {
 159    int i;
 160    for (i=0;i<len;i++)
 161       y[i] = scale*x[i];
 162 }
 163
 164 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
 165 {
 166    int i;
 167    float scale_1 = 1/scale;
 168    for (i=0;i<len;i++)
 169       y[i] = scale_1*x[i];
 170 }
 171 #endif
 172
 173
 174
 175 #ifdef FIXED_POINT
 176
 177
 178
 179 spx_word16_t compute_rms(const spx_sig_t *x, int len)
 180 {
 181    int i;
 182    spx_word32_t sum=0;
 183    spx_sig_t max_val=1;
 184    int sig_shift;
 185
 186    for (i=0;i<len;i++)
 187    {
 188       spx_sig_t tmp = x[i];
 189       if (tmp<0)
 190          tmp = -tmp;
 191       if (tmp > max_val)
 192          max_val = tmp;
 193    }
 194
 195    sig_shift=0;
 196    while (max_val>16383)
 197    {
 198       sig_shift++;
 199       max_val >>= 1;
 200    }
 201
 202    for (i=0;i<len;i+=4)
 203    {
 204       spx_word32_t sum2=0;
 205       spx_word16_t tmp;
 206       tmp = EXTRACT16(SHR32(x[i],sig_shift));
 207       sum2 = MAC16_16(sum2,tmp,tmp);
 208       tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
 209       sum2 = MAC16_16(sum2,tmp,tmp);
 210       tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
 211       sum2 = MAC16_16(sum2,tmp,tmp);
 212       tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
 213       sum2 = MAC16_16(sum2,tmp,tmp);
 214       sum = ADD32(sum,SHR32(sum2,6));
 215    }
 216
 217    return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
 218 }
 219
 220 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
 221 {
 222    int i;
 223    spx_word16_t max_val=10;
 224
 225    for (i=0;i<len;i++)
 226    {
 227       spx_sig_t tmp = x[i];
 228       if (tmp<0)
 229          tmp = -tmp;
 230       if (tmp > max_val)
 231          max_val = tmp;
 232    }
 233    if (max_val>16383)
 234    {
 235       spx_word32_t sum=0;
 236       for (i=0;i<len;i+=4)
 237       {
 238          spx_word32_t sum2=0;
 239          sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
 240          sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
 241          sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
 242          sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
 243          sum = ADD32(sum,SHR32(sum2,6));
 244       }
 245       return SHL16(spx_sqrt(DIV32(sum,len)),4);
 246    } else {
 247       spx_word32_t sum=0;
 248       int sig_shift=0;
 249       if (max_val < 8192)
 250          sig_shift=1;
 251       if (max_val < 4096)
 252          sig_shift=2;
 253       if (max_val < 2048)
 254          sig_shift=3;
 255       for (i=0;i<len;i+=4)
 256       {
 257          spx_word32_t sum2=0;
 258          sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
 259          sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
 260          sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
 261          sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
 262          sum = ADD32(sum,SHR32(sum2,6));
 263       }
 264       return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
 265    }
 266 }
 267
 268 #ifndef OVERRIDE_NORMALIZE16
 269 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
 270 {
 271    int i;
 272    spx_sig_t max_val=1;
 273    int sig_shift;
 274
 275    for (i=0;i<len;i++)
 276    {
 277       spx_sig_t tmp = x[i];
 278       if (tmp<0)
 279          tmp = NEG32(tmp);
 280       if (tmp >= max_val)
 281          max_val = tmp;
 282    }
 283
 284    sig_shift=0;
 285    while (max_val>max_scale)
 286    {
 287       sig_shift++;
 288       max_val >>= 1;
 289    }
 290
 291    for (i=0;i<len;i++)
 292       y[i] = EXTRACT16(SHR32(x[i], sig_shift));
 293
 294    return sig_shift;
 295 }
 296 #endif
 297
 298 #else
 299
 300 spx_word16_t compute_rms(const spx_sig_t *x, int len)
 301 {
 302    int i;
 303    float sum=0;
 304    for (i=0;i<len;i++)
 305    {
 306       sum += x[i]*x[i];
 307    }
 308    return sqrt(.1+sum/len);
 309 }
 310 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
 311 {
 312    return compute_rms(x, len);
 313 }
 314 #endif
 315
 316
 317
 318 #ifndef OVERRIDE_FILTER_MEM16
 319 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 320 {
 321    int i,j;
 322    spx_word16_t xi,yi,nyi;
 323    for (i=0;i<N;i++)
 324    {
 325       xi= x[i];
 326       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 327       nyi = NEG16(yi);
 328       for (j=0;j<ord-1;j++)
 329       {
 330          mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
 331       }
 332       mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
 333       y[i] = yi;
 334    }
 335 }
 336 #endif
 337
 338 #ifndef OVERRIDE_IIR_MEM16
 339 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 340 {
 341    int i,j;
 342    spx_word16_t yi,nyi;
 343
 344    for (i=0;i<N;i++)
 345    {
 346       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 347       nyi = NEG16(yi);
 348       for (j=0;j<ord-1;j++)
 349       {
 350          mem[j] = MAC16_16(mem[j+1],den[j],nyi);
 351       }
 352       mem[ord-1] = MULT16_16(den[ord-1],nyi);
 353       y[i] = yi;
 354    }
 355 }
 356 #endif
 357
 358 #ifndef OVERRIDE_FIR_MEM16
 359 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
 360 {
 361    int i,j;
 362    spx_word16_t xi,yi;
 363
 364    for (i=0;i<N;i++)
 365    {
 366       xi=x[i];
 367       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
 368       for (j=0;j<ord-1;j++)
 369       {
 370          mem[j] = MAC16_16(mem[j+1], num[j],xi);
 371       }
 372       mem[ord-1] = MULT16_16(num[ord-1],xi);
 373       y[i] = yi;
 374    }
 375 }
 376 #endif
 377
 378
 379 void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 380 {
 381    int i;
 382    VARDECL(spx_mem_t *mem);
 383    ALLOC(mem, ord, spx_mem_t);
 384    for (i=0;i<ord;i++)
 385       mem[i]=0;
 386    iir_mem16(xx, ak, y, N, ord, mem, stack);
 387    for (i=0;i<ord;i++)
 388       mem[i]=0;
 389    filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
 390 }
 391 void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 392 {
 393    int i;
 394    VARDECL(spx_mem_t *mem);
 395    ALLOC(mem, ord, spx_mem_t);
 396    for (i=0;i<ord;i++)
 397       mem[i]=0;
 398    filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
 399    for (i=0;i<ord;i++)
 400       mem[i]=0;
 401    fir_mem16(y, awk2, y, N, ord, mem, stack);
 402 }
 403
 404
 405 #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
 406 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
 407 {
 408    int i,j;
 409    spx_word16_t y1, ny1i, ny2i;
 410    VARDECL(spx_mem_t *mem1);
 411    VARDECL(spx_mem_t *mem2);
 412    ALLOC(mem1, ord, spx_mem_t);
 413    ALLOC(mem2, ord, spx_mem_t);
 414
 415    y[0] = LPC_SCALING;
 416    for (i=0;i<ord;i++)
 417       y[i+1] = awk1[i];
 418    i++;
 419    for (;i<N;i++)
 420       y[i] = VERY_SMALL;
 421    for (i=0;i<ord;i++)
 422       mem1[i] = mem2[i] = 0;
 423    for (i=0;i<N;i++)
 424    {
 425       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
 426       ny1i = NEG16(y1);
 427       y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
 428       ny2i = NEG16(y[i]);
 429       for (j=0;j<ord-1;j++)
 430       {
 431          mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
 432          mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
 433       }
 434       mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
 435       mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
 436    }
 437 }
 438 #endif
 439
 440 /* Decomposes a signal into low-band and high-band using a QMF */
 441 void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
 442 {
 443    int i,j,k,M2;
 444    VARDECL(spx_word16_t *a);
 445    VARDECL(spx_word16_t *x);
 446    spx_word16_t *x2;
 447
 448    ALLOC(a, M, spx_word16_t);
 449    ALLOC(x, N+M-1, spx_word16_t);
 450    x2=x+M-1;
 451    M2=M>>1;
 452    for (i=0;i<M;i++)
 453       a[M-i-1]= aa[i];
 454    for (i=0;i<M-1;i++)
 455       x[i]=mem[M-i-2];
 456    for (i=0;i<N;i++)
 457       x[i+M-1]=SHR16(xx[i],1);
 458    for (i=0;i<M-1;i++)
 459       mem[i]=SHR16(xx[N-i-1],1);
 460    for (i=0,k=0;i<N;i+=2,k++)
 461    {
 462       spx_word32_t y1k=0, y2k=0;
 463       for (j=0;j<M2;j++)
 464       {
 465          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
 466          y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
 467          j++;
 468          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
 469          y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
 470       }
 471       y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
 472       y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
 473    }
 474 }
 475
 476 /* Re-synthesised a signal from the QMF low-band and high-band signals */
 477 void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
 478    /* assumptions:
 479       all odd x[i] are zero -- well, actually they are left out of the array now
 480       N and M are multiples of 4 */
 481 {
 482    int i, j;
 483    int M2, N2;
 484    VARDECL(spx_word16_t *xx1);
 485    VARDECL(spx_word16_t *xx2);
 486
 487    M2 = M>>1;
 488    N2 = N>>1;
 489    ALLOC(xx1, M2+N2, spx_word16_t);
 490    ALLOC(xx2, M2+N2, spx_word16_t);
 491
 492    for (i = 0; i < N2; i++)
 493       xx1[i] = x1[N2-1-i];
 494    for (i = 0; i < M2; i++)
 495       xx1[N2+i] = mem1[2*i+1];
 496    for (i = 0; i < N2; i++)
 497       xx2[i] = x2[N2-1-i];
 498    for (i = 0; i < M2; i++)
 499       xx2[N2+i] = mem2[2*i+1];
 500
 501    for (i = 0; i < N2; i += 2) {
 502       spx_sig_t y0, y1, y2, y3;
 503       spx_word16_t x10, x20;
 504
 505       y0 = y1 = y2 = y3 = 0;
 506       x10 = xx1[N2-2-i];
 507       x20 = xx2[N2-2-i];
 508
 509       for (j = 0; j < M2; j += 2) {
 510          spx_word16_t x11, x21;
 511          spx_word16_t a0, a1;
 512
 513          a0 = a[2*j];
 514          a1 = a[2*j+1];
 515          x11 = xx1[N2-1+j-i];
 516          x21 = xx2[N2-1+j-i];
 517
 518 #ifdef FIXED_POINT
 519          /* We multiply twice by the same coef to avoid overflows */
 520          y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
 521          y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
 522          y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
 523          y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
 524 #else
 525          y0 = ADD32(y0,MULT16_16(a0, x11-x21));
 526          y1 = ADD32(y1,MULT16_16(a1, x11+x21));
 527          y2 = ADD32(y2,MULT16_16(a0, x10-x20));
 528          y3 = ADD32(y3,MULT16_16(a1, x10+x20));
 529 #endif
 530          a0 = a[2*j+2];
 531          a1 = a[2*j+3];
 532          x10 = xx1[N2+j-i];
 533          x20 = xx2[N2+j-i];
 534
 535 #ifdef FIXED_POINT
 536          /* We multiply twice by the same coef to avoid overflows */
 537          y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
 538          y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
 539          y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
 540          y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
 541 #else
 542          y0 = ADD32(y0,MULT16_16(a0, x10-x20));
 543          y1 = ADD32(y1,MULT16_16(a1, x10+x20));
 544          y2 = ADD32(y2,MULT16_16(a0, x11-x21));
 545          y3 = ADD32(y3,MULT16_16(a1, x11+x21));
 546 #endif
 547       }
 548 #ifdef FIXED_POINT
 549       y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
 550       y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
 551       y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
 552       y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
 553 #else
 554       /* Normalize up explicitly if we're in float */
 555       y[2*i] = 2.f*y0;
 556       y[2*i+1] = 2.f*y1;
 557       y[2*i+2] = 2.f*y2;
 558       y[2*i+3] = 2.f*y3;
 559 #endif
 560    }
 561
 562    for (i = 0; i < M2; i++)
 563       mem1[2*i+1] = xx1[i];
 564    for (i = 0; i < M2; i++)
 565       mem2[2*i+1] = xx2[i];
 566 }
 567
 568 #ifdef FIXED_POINT
 569 #if 0
 570 const spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
 571                                  {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
 572                                  {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
 573 #else
 574 const spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
 575                                 {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
 576                                  {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
 577 #endif
 578 #else
 579 #if 0
 580 const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
 581                           {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
 582                           {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
 583 #else
 584 const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
 585                           {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
 586                           {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
 587 #endif
 588 #endif
 589
 590 int interp_pitch(
 591 spx_word16_t *exc,          /*decoded excitation*/
 592 spx_word16_t *interp,          /*decoded excitation*/
 593 int pitch,               /*pitch period*/
 594 int len
 595 )
 596 {
 597    int i,j,k;
 598    spx_word32_t corr[4][7];
 599    spx_word32_t maxcorr;
 600    int maxi, maxj;
 601    for (i=0;i<7;i++)
 602    {
 603       corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
 604    }
 605    for (i=0;i<3;i++)
 606    {
 607       for (j=0;j<7;j++)
 608       {
 609          int i1, i2;
 610          spx_word32_t tmp=0;
 611          i1 = 3-j;
 612          if (i1<0)
 613             i1 = 0;
 614          i2 = 10-j;
 615          if (i2>7)
 616             i2 = 7;
 617          for (k=i1;k<i2;k++)
 618             tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
 619          corr[i+1][j] = tmp;
 620       }
 621    }
 622    maxi=maxj=0;
 623    maxcorr = corr[0][0];
 624    for (i=0;i<4;i++)
 625    {
 626       for (j=0;j<7;j++)
 627       {
 628          if (corr[i][j] > maxcorr)
 629          {
 630             maxcorr = corr[i][j];
 631             maxi=i;
 632             maxj=j;
 633          }
 634       }
 635    }
 636    for (i=0;i<len;i++)
 637    {
 638       spx_word32_t tmp = 0;
 639       if (maxi>0)
 640       {
 641          for (k=0;k<7;k++)
 642          {
 643             tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
 644          }
 645       } else {
 646          tmp = SHL32(exc[i-(pitch-maxj+3)],15);
 647       }
 648       interp[i] = PSHR32(tmp,15);
 649    }
 650    return pitch-maxj+3;
 651 }
 652
 653 void multicomb(
 654 spx_word16_t *exc,          /*decoded excitation*/
 655 spx_word16_t *new_exc,      /*enhanced excitation*/
 656 spx_coef_t *ak,           /*LPC filter coefs*/
 657 int p,               /*LPC order*/
 658 int nsf,             /*sub-frame size*/
 659 int pitch,           /*pitch period*/
 660 int max_pitch,
 661 spx_word16_t  comb_gain,    /*gain of comb filter*/
 662 char *stack
 663 )
 664 {
 665    int i;
 666    VARDECL(spx_word16_t *iexc);
 667    spx_word16_t old_ener, new_ener;
 668    int corr_pitch;
 669
 670    spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
 671    spx_word32_t corr0, corr1;
 672    spx_word16_t gain0, gain1;
 673    spx_word16_t pgain1, pgain2;
 674    spx_word16_t c1, c2;
 675    spx_word16_t g1, g2;
 676    spx_word16_t ngain;
 677    spx_word16_t gg1, gg2;
 678 #ifdef FIXED_POINT
 679    int scaledown=0;
 680 #endif
 681 #if 0 /* Set to 1 to enable full pitch search */
 682    int nol_pitch[6];
 683    spx_word16_t nol_pitch_coef[6];
 684    spx_word16_t ol_pitch_coef;
 685    open_loop_nbest_pitch(exc, 20, 120, nsf,
 686                          nol_pitch, nol_pitch_coef, 6, stack);
 687    corr_pitch=nol_pitch[0];
 688    ol_pitch_coef = nol_pitch_coef[0];
 689    /*Try to remove pitch multiples*/
 690    for (i=1;i<6;i++)
 691    {
 692 #ifdef FIXED_POINT
 693       if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
 694 #else
 695       if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
 696 #endif
 697          (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
 698          ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
 699       {
 700          corr_pitch = nol_pitch[i];
 701       }
 702    }
 703 #else
 704    corr_pitch = pitch;
 705 #endif
 706
 707    ALLOC(iexc, 2*nsf, spx_word16_t);
 708
 709    interp_pitch(exc, iexc, corr_pitch, 80);
 710    if (corr_pitch>max_pitch)
 711       interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
 712    else
 713       interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
 714
 715 #ifdef FIXED_POINT
 716    for (i=0;i<nsf;i++)
 717    {
 718       if (ABS16(exc[i])>16383)
 719       {
 720          scaledown = 1;
 721          break;
 722       }
 723    }
 724    if (scaledown)
 725    {
 726       for (i=0;i<nsf;i++)
 727          exc[i] = SHR16(exc[i],1);
 728       for (i=0;i<2*nsf;i++)
 729          iexc[i] = SHR16(iexc[i],1);
 730    }
 731 #endif
 732    /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
 733
 734    /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
 735    iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
 736    iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
 737    exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
 738    corr0  = inner_prod(iexc,exc,nsf);
 739    if (corr0<0)
 740       corr0=0;
 741    corr1 = inner_prod(iexc+nsf,exc,nsf);
 742    if (corr1<0)
 743       corr1=0;
 744 #ifdef FIXED_POINT
 745    /* Doesn't cost much to limit the ratio and it makes the rest easier */
 746    if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
 747       iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
 748    if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
 749       iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
 750 #endif
 751    if (corr0 > MULT16_16(iexc0_mag,exc_mag))
 752       pgain1 = QCONST16(1., 14);
 753    else
 754       pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
 755    if (corr1 > MULT16_16(iexc1_mag,exc_mag))
 756       pgain2 = QCONST16(1., 14);
 757    else
 758       pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
 759    gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
 760    gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
 761    if (comb_gain>0)
 762    {
 763 #ifdef FIXED_POINT
 764       c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
 765       c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
 766 #else
 767       c1 = .4*comb_gain+.07;
 768       c2 = .5+1.72*(c1-.07);
 769 #endif
 770    } else
 771    {
 772       c1=c2=0;
 773    }
 774 #ifdef FIXED_POINT
 775    g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
 776    g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
 777 #else
 778    g1 = 1-c2*pgain1*pgain1;
 779    g2 = 1-c2*pgain2*pgain2;
 780 #endif
 781    if (g1<c1)
 782       g1 = c1;
 783    if (g2<c1)
 784       g2 = c1;
 785    g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
 786    g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
 787    if (corr_pitch>max_pitch)
 788    {
 789       gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
 790       gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
 791    } else {
 792       gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
 793       gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
 794    }
 795    for (i=0;i<nsf;i++)
 796       new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
 797    /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
 798    new_ener = compute_rms16(new_exc, nsf);
 799    old_ener = compute_rms16(exc, nsf);
 800
 801    if (old_ener < 1)
 802       old_ener = 1;
 803    if (new_ener < 1)
 804       new_ener = 1;
 805    if (old_ener > new_ener)
 806       old_ener = new_ener;
 807    ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
 808
 809    for (i=0;i<nsf;i++)
 810       new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
 811 #ifdef FIXED_POINT
 812    if (scaledown)
 813    {
 814       for (i=0;i<nsf;i++)
 815          exc[i] = SHL16(exc[i],1);
 816       for (i=0;i<nsf;i++)
 817          new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
 818    }
 819 #endif
 820 }
 821