OSDN Git Service

Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
authorVitor Sessak <vitor1001@gmail.com>
Wed, 16 Dec 2009 17:09:33 +0000 (17:09 +0000)
committerVitor Sessak <vitor1001@gmail.com>
Wed, 16 Dec 2009 17:09:33 +0000 (17:09 +0000)
Originally committed as revision 20884 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/celp_filters.c
libavcodec/celp_filters.h

index d886085..7f23aca 100644 (file)
@@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float *out,
 {
     int i,n;
 
-    for (n = 0; n < buffer_length; n++) {
+    float out0, out1, out2, out3;
+    float old_out0, old_out1, old_out2, old_out3;
+    float a,b,c;
+
+    a = filter_coeffs[0];
+    b = filter_coeffs[1];
+    c = filter_coeffs[2];
+    b -= filter_coeffs[0] * filter_coeffs[0];
+    c -= filter_coeffs[1] * filter_coeffs[0];
+    c -= filter_coeffs[0] * b;
+
+    old_out0 = out[-4];
+    old_out1 = out[-3];
+    old_out2 = out[-2];
+    old_out3 = out[-1];
+    for (n = 0; n <= buffer_length - 4; n+=4) {
+        float tmp0,tmp1,tmp2,tmp3;
+        float val;
+
+        out0 = in[0];
+        out1 = in[1];
+        out2 = in[2];
+        out3 = in[3];
+
+        out0 -= filter_coeffs[2] * old_out1;
+        out1 -= filter_coeffs[2] * old_out2;
+        out2 -= filter_coeffs[2] * old_out3;
+
+        out0 -= filter_coeffs[1] * old_out2;
+        out1 -= filter_coeffs[1] * old_out3;
+
+        out0 -= filter_coeffs[0] * old_out3;
+
+        val = filter_coeffs[3];
+
+        out0 -= val * old_out0;
+        out1 -= val * old_out1;
+        out2 -= val * old_out2;
+        out3 -= val * old_out3;
+
+        old_out3 = out[-5];
+
+        for (i = 5; i <= filter_length; i += 2) {
+            val = filter_coeffs[i-1];
+
+            out0 -= val * old_out3;
+            out1 -= val * old_out0;
+            out2 -= val * old_out1;
+            out3 -= val * old_out2;
+
+            old_out2 = out[-i-1];
+
+            val = filter_coeffs[i];
+
+            out0 -= val * old_out2;
+            out1 -= val * old_out3;
+            out2 -= val * old_out0;
+            out3 -= val * old_out1;
+
+            FFSWAP(float, old_out0, old_out2);
+            old_out1 = old_out3;
+            old_out3 = out[-i-2];
+        }
+
+        tmp0 = out0;
+        tmp1 = out1;
+        tmp2 = out2;
+        tmp3 = out3;
+
+        out3 -= a * tmp2;
+        out2 -= a * tmp1;
+        out1 -= a * tmp0;
+
+        out3 -= b * tmp1;
+        out2 -= b * tmp0;
+
+        out3 -= c * tmp0;
+
+
+        out[0] = out0;
+        out[1] = out1;
+        out[2] = out2;
+        out[3] = out3;
+
+        old_out0 = out0;
+        old_out1 = out1;
+        old_out2 = out2;
+        old_out3 = out3;
+
+        out += 4;
+        in  += 4;
+    }
+
+    out -= n;
+    in -= n;
+    for (; n < buffer_length; n++) {
         out[n] = in[n];
         for (i = 1; i <= filter_length; i++)
             out[n] -= filter_coeffs[i-1] * out[n-i];
index d9db95d..6069a3f 100644 (file)
@@ -90,7 +90,8 @@ int ff_celp_lp_synthesis_filter(int16_t *out,
  * @param filter_coeffs filter coefficients.
  * @param in input signal
  * @param buffer_length amount of data to process
- * @param filter_length filter length (10 for 10th order LP filter)
+ * @param filter_length filter length (10 for 10th order LP filter). Must be
+ *                      greater than 4 and even.
  *
  * @note Output buffer must contain filter_length samples of past
  *       speech data before pointer.