OSDN Git Service

cbacb4e9e75816192e04fdcc5e3e37fc3d9c8164
[android-x86/external-bluetooth-sbc.git] / sbc / sbc_primitives_mmx.c
1 /*
2  *
3  *  Bluetooth low-complexity, subband codec (SBC) library
4  *
5  *  Copyright (C) 2008-2010  Nokia Corporation
6  *  Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7  *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8  *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9  *
10  *
11  *  This library is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU Lesser General Public
13  *  License as published by the Free Software Foundation; either
14  *  version 2.1 of the License, or (at your option) any later version.
15  *
16  *  This library is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  Lesser General Public License for more details.
20  *
21  *  You should have received a copy of the GNU Lesser General Public
22  *  License along with this library; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24  *
25  */
26
27 #include <stdint.h>
28 #include <limits.h>
29 #include "sbc.h"
30 #include "sbc_math.h"
31 #include "sbc_tables.h"
32
33 #include "sbc_primitives_mmx.h"
34
35 /*
36  * MMX optimizations
37  */
38
39 #ifdef SBC_BUILD_WITH_MMX_SUPPORT
40
41 static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out,
42                                         const FIXED_T *consts)
43 {
44         static const SBC_ALIGNED int32_t round_c[2] = {
45                 1 << (SBC_PROTO_FIXED4_SCALE - 1),
46                 1 << (SBC_PROTO_FIXED4_SCALE - 1),
47         };
48         __asm__ volatile (
49                 "movq        (%0), %%mm0\n"
50                 "movq       8(%0), %%mm1\n"
51                 "pmaddwd     (%1), %%mm0\n"
52                 "pmaddwd    8(%1), %%mm1\n"
53                 "paddd       (%2), %%mm0\n"
54                 "paddd       (%2), %%mm1\n"
55                 "\n"
56                 "movq      16(%0), %%mm2\n"
57                 "movq      24(%0), %%mm3\n"
58                 "pmaddwd   16(%1), %%mm2\n"
59                 "pmaddwd   24(%1), %%mm3\n"
60                 "paddd      %%mm2, %%mm0\n"
61                 "paddd      %%mm3, %%mm1\n"
62                 "\n"
63                 "movq      32(%0), %%mm2\n"
64                 "movq      40(%0), %%mm3\n"
65                 "pmaddwd   32(%1), %%mm2\n"
66                 "pmaddwd   40(%1), %%mm3\n"
67                 "paddd      %%mm2, %%mm0\n"
68                 "paddd      %%mm3, %%mm1\n"
69                 "\n"
70                 "movq      48(%0), %%mm2\n"
71                 "movq      56(%0), %%mm3\n"
72                 "pmaddwd   48(%1), %%mm2\n"
73                 "pmaddwd   56(%1), %%mm3\n"
74                 "paddd      %%mm2, %%mm0\n"
75                 "paddd      %%mm3, %%mm1\n"
76                 "\n"
77                 "movq      64(%0), %%mm2\n"
78                 "movq      72(%0), %%mm3\n"
79                 "pmaddwd   64(%1), %%mm2\n"
80                 "pmaddwd   72(%1), %%mm3\n"
81                 "paddd      %%mm2, %%mm0\n"
82                 "paddd      %%mm3, %%mm1\n"
83                 "\n"
84                 "psrad         %4, %%mm0\n"
85                 "psrad         %4, %%mm1\n"
86                 "packssdw   %%mm0, %%mm0\n"
87                 "packssdw   %%mm1, %%mm1\n"
88                 "\n"
89                 "movq       %%mm0, %%mm2\n"
90                 "pmaddwd   80(%1), %%mm0\n"
91                 "pmaddwd   88(%1), %%mm2\n"
92                 "\n"
93                 "movq       %%mm1, %%mm3\n"
94                 "pmaddwd   96(%1), %%mm1\n"
95                 "pmaddwd  104(%1), %%mm3\n"
96                 "paddd      %%mm1, %%mm0\n"
97                 "paddd      %%mm3, %%mm2\n"
98                 "\n"
99                 "movq       %%mm0, (%3)\n"
100                 "movq       %%mm2, 8(%3)\n"
101                 :
102                 : "r" (in), "r" (consts), "r" (&round_c), "r" (out),
103                         "i" (SBC_PROTO_FIXED4_SCALE)
104                 : "cc", "memory");
105 }
106
107 static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
108                                                         const FIXED_T *consts)
109 {
110         static const SBC_ALIGNED int32_t round_c[2] = {
111                 1 << (SBC_PROTO_FIXED8_SCALE - 1),
112                 1 << (SBC_PROTO_FIXED8_SCALE - 1),
113         };
114         __asm__ volatile (
115                 "movq        (%0), %%mm0\n"
116                 "movq       8(%0), %%mm1\n"
117                 "movq      16(%0), %%mm2\n"
118                 "movq      24(%0), %%mm3\n"
119                 "pmaddwd     (%1), %%mm0\n"
120                 "pmaddwd    8(%1), %%mm1\n"
121                 "pmaddwd   16(%1), %%mm2\n"
122                 "pmaddwd   24(%1), %%mm3\n"
123                 "paddd       (%2), %%mm0\n"
124                 "paddd       (%2), %%mm1\n"
125                 "paddd       (%2), %%mm2\n"
126                 "paddd       (%2), %%mm3\n"
127                 "\n"
128                 "movq      32(%0), %%mm4\n"
129                 "movq      40(%0), %%mm5\n"
130                 "movq      48(%0), %%mm6\n"
131                 "movq      56(%0), %%mm7\n"
132                 "pmaddwd   32(%1), %%mm4\n"
133                 "pmaddwd   40(%1), %%mm5\n"
134                 "pmaddwd   48(%1), %%mm6\n"
135                 "pmaddwd   56(%1), %%mm7\n"
136                 "paddd      %%mm4, %%mm0\n"
137                 "paddd      %%mm5, %%mm1\n"
138                 "paddd      %%mm6, %%mm2\n"
139                 "paddd      %%mm7, %%mm3\n"
140                 "\n"
141                 "movq      64(%0), %%mm4\n"
142                 "movq      72(%0), %%mm5\n"
143                 "movq      80(%0), %%mm6\n"
144                 "movq      88(%0), %%mm7\n"
145                 "pmaddwd   64(%1), %%mm4\n"
146                 "pmaddwd   72(%1), %%mm5\n"
147                 "pmaddwd   80(%1), %%mm6\n"
148                 "pmaddwd   88(%1), %%mm7\n"
149                 "paddd      %%mm4, %%mm0\n"
150                 "paddd      %%mm5, %%mm1\n"
151                 "paddd      %%mm6, %%mm2\n"
152                 "paddd      %%mm7, %%mm3\n"
153                 "\n"
154                 "movq      96(%0), %%mm4\n"
155                 "movq     104(%0), %%mm5\n"
156                 "movq     112(%0), %%mm6\n"
157                 "movq     120(%0), %%mm7\n"
158                 "pmaddwd   96(%1), %%mm4\n"
159                 "pmaddwd  104(%1), %%mm5\n"
160                 "pmaddwd  112(%1), %%mm6\n"
161                 "pmaddwd  120(%1), %%mm7\n"
162                 "paddd      %%mm4, %%mm0\n"
163                 "paddd      %%mm5, %%mm1\n"
164                 "paddd      %%mm6, %%mm2\n"
165                 "paddd      %%mm7, %%mm3\n"
166                 "\n"
167                 "movq     128(%0), %%mm4\n"
168                 "movq     136(%0), %%mm5\n"
169                 "movq     144(%0), %%mm6\n"
170                 "movq     152(%0), %%mm7\n"
171                 "pmaddwd  128(%1), %%mm4\n"
172                 "pmaddwd  136(%1), %%mm5\n"
173                 "pmaddwd  144(%1), %%mm6\n"
174                 "pmaddwd  152(%1), %%mm7\n"
175                 "paddd      %%mm4, %%mm0\n"
176                 "paddd      %%mm5, %%mm1\n"
177                 "paddd      %%mm6, %%mm2\n"
178                 "paddd      %%mm7, %%mm3\n"
179                 "\n"
180                 "psrad         %4, %%mm0\n"
181                 "psrad         %4, %%mm1\n"
182                 "psrad         %4, %%mm2\n"
183                 "psrad         %4, %%mm3\n"
184                 "\n"
185                 "packssdw   %%mm0, %%mm0\n"
186                 "packssdw   %%mm1, %%mm1\n"
187                 "packssdw   %%mm2, %%mm2\n"
188                 "packssdw   %%mm3, %%mm3\n"
189                 "\n"
190                 "movq       %%mm0, %%mm4\n"
191                 "movq       %%mm0, %%mm5\n"
192                 "pmaddwd  160(%1), %%mm4\n"
193                 "pmaddwd  168(%1), %%mm5\n"
194                 "\n"
195                 "movq       %%mm1, %%mm6\n"
196                 "movq       %%mm1, %%mm7\n"
197                 "pmaddwd  192(%1), %%mm6\n"
198                 "pmaddwd  200(%1), %%mm7\n"
199                 "paddd      %%mm6, %%mm4\n"
200                 "paddd      %%mm7, %%mm5\n"
201                 "\n"
202                 "movq       %%mm2, %%mm6\n"
203                 "movq       %%mm2, %%mm7\n"
204                 "pmaddwd  224(%1), %%mm6\n"
205                 "pmaddwd  232(%1), %%mm7\n"
206                 "paddd      %%mm6, %%mm4\n"
207                 "paddd      %%mm7, %%mm5\n"
208                 "\n"
209                 "movq       %%mm3, %%mm6\n"
210                 "movq       %%mm3, %%mm7\n"
211                 "pmaddwd  256(%1), %%mm6\n"
212                 "pmaddwd  264(%1), %%mm7\n"
213                 "paddd      %%mm6, %%mm4\n"
214                 "paddd      %%mm7, %%mm5\n"
215                 "\n"
216                 "movq       %%mm4, (%3)\n"
217                 "movq       %%mm5, 8(%3)\n"
218                 "\n"
219                 "movq       %%mm0, %%mm5\n"
220                 "pmaddwd  176(%1), %%mm0\n"
221                 "pmaddwd  184(%1), %%mm5\n"
222                 "\n"
223                 "movq       %%mm1, %%mm7\n"
224                 "pmaddwd  208(%1), %%mm1\n"
225                 "pmaddwd  216(%1), %%mm7\n"
226                 "paddd      %%mm1, %%mm0\n"
227                 "paddd      %%mm7, %%mm5\n"
228                 "\n"
229                 "movq       %%mm2, %%mm7\n"
230                 "pmaddwd  240(%1), %%mm2\n"
231                 "pmaddwd  248(%1), %%mm7\n"
232                 "paddd      %%mm2, %%mm0\n"
233                 "paddd      %%mm7, %%mm5\n"
234                 "\n"
235                 "movq       %%mm3, %%mm7\n"
236                 "pmaddwd  272(%1), %%mm3\n"
237                 "pmaddwd  280(%1), %%mm7\n"
238                 "paddd      %%mm3, %%mm0\n"
239                 "paddd      %%mm7, %%mm5\n"
240                 "\n"
241                 "movq       %%mm0, 16(%3)\n"
242                 "movq       %%mm5, 24(%3)\n"
243                 :
244                 : "r" (in), "r" (consts), "r" (&round_c), "r" (out),
245                         "i" (SBC_PROTO_FIXED8_SCALE)
246                 : "cc", "memory");
247 }
248
249 static inline void sbc_analyze_4b_4s_mmx(struct sbc_encoder_state *state,
250                 int16_t *x, int32_t *out, int out_stride)
251 {
252         /* Analyze blocks */
253         sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd);
254         out += out_stride;
255         sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even);
256         out += out_stride;
257         sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd);
258         out += out_stride;
259         sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even);
260
261         __asm__ volatile ("emms\n");
262 }
263
264 static inline void sbc_analyze_4b_8s_mmx(struct sbc_encoder_state *state,
265                 int16_t *x, int32_t *out, int out_stride)
266 {
267         /* Analyze blocks */
268         sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd);
269         out += out_stride;
270         sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even);
271         out += out_stride;
272         sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd);
273         out += out_stride;
274         sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even);
275
276         __asm__ volatile ("emms\n");
277 }
278
279 static void sbc_calc_scalefactors_mmx(
280         int32_t sb_sample_f[16][2][8],
281         uint32_t scale_factor[2][8],
282         int blocks, int channels, int subbands)
283 {
284         static const SBC_ALIGNED int32_t consts[2] = {
285                 1 << SCALE_OUT_BITS,
286                 1 << SCALE_OUT_BITS,
287         };
288         int ch, sb;
289         intptr_t blk;
290         for (ch = 0; ch < channels; ch++) {
291                 for (sb = 0; sb < subbands; sb += 2) {
292                         blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
293                                 (char *) &sb_sample_f[0][0][0]));
294                         __asm__ volatile (
295                                 "movq         (%4), %%mm0\n"
296                         "1:\n"
297                                 "movq     (%1, %0), %%mm1\n"
298                                 "pxor        %%mm2, %%mm2\n"
299                                 "pcmpgtd     %%mm2, %%mm1\n"
300                                 "paddd    (%1, %0), %%mm1\n"
301                                 "pcmpgtd     %%mm1, %%mm2\n"
302                                 "pxor        %%mm2, %%mm1\n"
303
304                                 "por         %%mm1, %%mm0\n"
305
306                                 "sub            %2, %0\n"
307                                 "jns            1b\n"
308
309                                 "movd        %%mm0, %k0\n"
310                                 "psrlq         $32, %%mm0\n"
311                                 "bsrl          %k0, %k0\n"
312                                 "subl           %5, %k0\n"
313                                 "movl          %k0, (%3)\n"
314
315                                 "movd        %%mm0, %k0\n"
316                                 "bsrl          %k0, %k0\n"
317                                 "subl           %5, %k0\n"
318                                 "movl          %k0, 4(%3)\n"
319                         : "+r" (blk)
320                         : "r" (&sb_sample_f[0][ch][sb]),
321                                 "i" ((char *) &sb_sample_f[1][0][0] -
322                                         (char *) &sb_sample_f[0][0][0]),
323                                 "r" (&scale_factor[ch][sb]),
324                                 "r" (&consts),
325                                 "i" (SCALE_OUT_BITS)
326                         : "cc", "memory");
327                 }
328         }
329         __asm__ volatile ("emms\n");
330 }
331
332 static int check_mmx_support(void)
333 {
334 #ifdef __amd64__
335         return 1; /* We assume that all 64-bit processors have MMX support */
336 #else
337         int cpuid_feature_information;
338         __asm__ volatile (
339                 /* According to Intel manual, CPUID instruction is supported
340                  * if the value of ID bit (bit 21) in EFLAGS can be modified */
341                 "pushf\n"
342                 "movl     (%%esp),   %0\n"
343                 "xorl     $0x200000, (%%esp)\n" /* try to modify ID bit */
344                 "popf\n"
345                 "pushf\n"
346                 "xorl     (%%esp),   %0\n"      /* check if ID bit changed */
347                 "jz       1f\n"
348                 "push     %%eax\n"
349                 "push     %%ebx\n"
350                 "push     %%ecx\n"
351                 "mov      $1,        %%eax\n"
352                 "cpuid\n"
353                 "pop      %%ecx\n"
354                 "pop      %%ebx\n"
355                 "pop      %%eax\n"
356                 "1:\n"
357                 "popf\n"
358                 : "=d" (cpuid_feature_information)
359                 :
360                 : "cc");
361     return cpuid_feature_information & (1 << 23);
362 #endif
363 }
364
365 void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
366 {
367         if (check_mmx_support()) {
368                 state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
369                 state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
370                 state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
371                 state->implementation_info = "MMX";
372         }
373 }
374
375 #endif