1 /* This code is imported several times in lpc_intrin_sse2.c with different
2 * values for MAX_LAG. Comments are for MAX_LAG == 14 */
4 __m128d sum0, sum1, sum2, sum3;
5 __m128d d0, d1, d2, d3;
16 FLAC__ASSERT(lag <= MAX_LAG);
18 /* Initialize all sum vectors with zero */
19 sum0 = _mm_setzero_pd();
20 sum1 = _mm_setzero_pd();
21 sum2 = _mm_setzero_pd();
22 sum3 = _mm_setzero_pd();
23 d0 = _mm_setzero_pd();
24 d1 = _mm_setzero_pd();
25 d2 = _mm_setzero_pd();
26 d3 = _mm_setzero_pd();
28 sum4 = _mm_setzero_pd();
29 d4 = _mm_setzero_pd();
32 sum5 = _mm_setzero_pd();
33 sum6 = _mm_setzero_pd();
34 d5 = _mm_setzero_pd();
35 d6 = _mm_setzero_pd();
38 /* Loop backwards through samples from data_len to limit */
39 for(i = data_len-1; i >= 0; i--) {
40 __m128d d = _mm_set1_pd(data[i]);
42 /* The next lines of code work like a queue. For more
43 * information see the lag8 version of this function */
45 d6 = _mm_shuffle_pd(d5, d6, _MM_SHUFFLE(0,0,0,1));
46 d5 = _mm_shuffle_pd(d4, d5, _MM_SHUFFLE(0,0,0,1));
49 d4 = _mm_shuffle_pd(d3, d4, _MM_SHUFFLE(0,0,0,1));
51 d3 = _mm_shuffle_pd(d2, d3, _MM_SHUFFLE(0,0,0,1));
52 d2 = _mm_shuffle_pd(d1, d2, _MM_SHUFFLE(0,0,0,1));
53 d1 = _mm_shuffle_pd(d0, d1, _MM_SHUFFLE(0,0,0,1));
54 d0 = _mm_shuffle_pd(d, d0, _MM_SHUFFLE(0,0,0,1));
57 sum0 = _mm_add_pd(sum0, _mm_mul_pd(d, d0));
58 sum1 = _mm_add_pd(sum1, _mm_mul_pd(d, d1));
59 sum2 = _mm_add_pd(sum2, _mm_mul_pd(d, d2));
60 sum3 = _mm_add_pd(sum3, _mm_mul_pd(d, d3));
62 sum4 = _mm_add_pd(sum4, _mm_mul_pd(d, d4));
65 sum5 = _mm_add_pd(sum5, _mm_mul_pd(d, d5));
66 sum6 = _mm_add_pd(sum6, _mm_mul_pd(d, d6));
70 /* Store sum0..sum6 in autoc[0..14] */
71 _mm_storeu_pd(autoc, sum0);
72 _mm_storeu_pd(autoc+2, sum1);
73 _mm_storeu_pd(autoc+4, sum2);
74 _mm_storeu_pd(autoc+6 ,sum3);
76 _mm_storeu_pd(autoc+8, sum4);
79 _mm_storeu_pd(autoc+10,sum5);
80 _mm_storeu_pd(autoc+12,sum6);