OSDN Git Service

sbc: ARM NEON optimization for scale factors calculation
[android-x86/external-bluetooth-sbc.git] / sbc / sbc.c
1 /*
2  *
3  *  Bluetooth low-complexity, subband codec (SBC) library
4  *
5  *  Copyright (C) 2008-2010  Nokia Corporation
6  *  Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7  *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8  *  Copyright (C) 2005-2008  Brad Midgley <bmidgley@xmission.com>
9  *
10  *
11  *  This library is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU Lesser General Public
13  *  License as published by the Free Software Foundation; either
14  *  version 2.1 of the License, or (at your option) any later version.
15  *
16  *  This library is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  Lesser General Public License for more details.
20  *
21  *  You should have received a copy of the GNU Lesser General Public
22  *  License along with this library; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24  *
25  */
26
27 /* todo items:
28
29   use a log2 table for byte integer scale factors calculation (sum log2 results
30   for high and low bytes) fill bitpool by 16 bits instead of one at a time in
31   bits allocation/bitpool generation port to the dsp
32
33 */
34
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38
39 #include <stdio.h>
40 #include <errno.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <sys/types.h>
44 #include <limits.h>
45
46 #include "sbc_math.h"
47 #include "sbc_tables.h"
48
49 #include "sbc.h"
50 #include "sbc_primitives.h"
51
52 #define SBC_SYNCWORD    0x9C
53
54 /* This structure contains an unpacked SBC frame.
55    Yes, there is probably quite some unused space herein */
56 struct sbc_frame {
57         uint8_t frequency;
58         uint8_t block_mode;
59         uint8_t blocks;
60         enum {
61                 MONO            = SBC_MODE_MONO,
62                 DUAL_CHANNEL    = SBC_MODE_DUAL_CHANNEL,
63                 STEREO          = SBC_MODE_STEREO,
64                 JOINT_STEREO    = SBC_MODE_JOINT_STEREO
65         } mode;
66         uint8_t channels;
67         enum {
68                 LOUDNESS        = SBC_AM_LOUDNESS,
69                 SNR             = SBC_AM_SNR
70         } allocation;
71         uint8_t subband_mode;
72         uint8_t subbands;
73         uint8_t bitpool;
74         uint16_t codesize;
75         uint8_t length;
76
77         /* bit number x set means joint stereo has been used in subband x */
78         uint8_t joint;
79
80         /* only the lower 4 bits of every element are to be used */
81         uint32_t SBC_ALIGNED scale_factor[2][8];
82
83         /* raw integer subband samples in the frame */
84         int32_t SBC_ALIGNED sb_sample_f[16][2][8];
85
86         /* modified subband samples */
87         int32_t SBC_ALIGNED sb_sample[16][2][8];
88
89         /* original pcm audio samples */
90         int16_t SBC_ALIGNED pcm_sample[2][16*8];
91 };
92
93 struct sbc_decoder_state {
94         int subbands;
95         int32_t V[2][170];
96         int offset[2][16];
97 };
98
99 /*
100  * Calculates the CRC-8 of the first len bits in data
101  */
102 static const uint8_t crc_table[256] = {
103         0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53,
104         0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB,
105         0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E,
106         0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76,
107         0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4,
108         0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C,
109         0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19,
110         0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1,
111         0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40,
112         0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8,
113         0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D,
114         0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65,
115         0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7,
116         0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F,
117         0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A,
118         0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2,
119         0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75,
120         0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D,
121         0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8,
122         0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50,
123         0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2,
124         0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A,
125         0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F,
126         0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7,
127         0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66,
128         0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E,
129         0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB,
130         0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43,
131         0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1,
132         0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09,
133         0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C,
134         0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4
135 };
136
137 static uint8_t sbc_crc8(const uint8_t *data, size_t len)
138 {
139         uint8_t crc = 0x0f;
140         size_t i;
141         uint8_t octet;
142
143         for (i = 0; i < len / 8; i++)
144                 crc = crc_table[crc ^ data[i]];
145
146         octet = data[i];
147         for (i = 0; i < len % 8; i++) {
148                 char bit = ((octet ^ crc) & 0x80) >> 7;
149
150                 crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0);
151
152                 octet = octet << 1;
153         }
154
155         return crc;
156 }
157
158 /*
159  * Code straight from the spec to calculate the bits array
160  * Takes a pointer to the frame in question, a pointer to the bits array and
161  * the sampling frequency (as 2 bit integer)
162  */
163 static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8])
164 {
165         uint8_t sf = frame->frequency;
166
167         if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) {
168                 int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
169                 int ch, sb;
170
171                 for (ch = 0; ch < frame->channels; ch++) {
172                         max_bitneed = 0;
173                         if (frame->allocation == SNR) {
174                                 for (sb = 0; sb < frame->subbands; sb++) {
175                                         bitneed[ch][sb] = frame->scale_factor[ch][sb];
176                                         if (bitneed[ch][sb] > max_bitneed)
177                                                 max_bitneed = bitneed[ch][sb];
178                                 }
179                         } else {
180                                 for (sb = 0; sb < frame->subbands; sb++) {
181                                         if (frame->scale_factor[ch][sb] == 0)
182                                                 bitneed[ch][sb] = -5;
183                                         else {
184                                                 if (frame->subbands == 4)
185                                                         loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
186                                                 else
187                                                         loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
188                                                 if (loudness > 0)
189                                                         bitneed[ch][sb] = loudness / 2;
190                                                 else
191                                                         bitneed[ch][sb] = loudness;
192                                         }
193                                         if (bitneed[ch][sb] > max_bitneed)
194                                                 max_bitneed = bitneed[ch][sb];
195                                 }
196                         }
197
198                         bitcount = 0;
199                         slicecount = 0;
200                         bitslice = max_bitneed + 1;
201                         do {
202                                 bitslice--;
203                                 bitcount += slicecount;
204                                 slicecount = 0;
205                                 for (sb = 0; sb < frame->subbands; sb++) {
206                                         if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
207                                                 slicecount++;
208                                         else if (bitneed[ch][sb] == bitslice + 1)
209                                                 slicecount += 2;
210                                 }
211                         } while (bitcount + slicecount < frame->bitpool);
212
213                         if (bitcount + slicecount == frame->bitpool) {
214                                 bitcount += slicecount;
215                                 bitslice--;
216                         }
217
218                         for (sb = 0; sb < frame->subbands; sb++) {
219                                 if (bitneed[ch][sb] < bitslice + 2)
220                                         bits[ch][sb] = 0;
221                                 else {
222                                         bits[ch][sb] = bitneed[ch][sb] - bitslice;
223                                         if (bits[ch][sb] > 16)
224                                                 bits[ch][sb] = 16;
225                                 }
226                         }
227
228                         for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
229                                 if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
230                                         bits[ch][sb]++;
231                                         bitcount++;
232                                 } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
233                                         bits[ch][sb] = 2;
234                                         bitcount += 2;
235                                 }
236                         }
237
238                         for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) {
239                                 if (bits[ch][sb] < 16) {
240                                         bits[ch][sb]++;
241                                         bitcount++;
242                                 }
243                         }
244
245                 }
246
247         } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) {
248                 int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice;
249                 int ch, sb;
250
251                 max_bitneed = 0;
252                 if (frame->allocation == SNR) {
253                         for (ch = 0; ch < 2; ch++) {
254                                 for (sb = 0; sb < frame->subbands; sb++) {
255                                         bitneed[ch][sb] = frame->scale_factor[ch][sb];
256                                         if (bitneed[ch][sb] > max_bitneed)
257                                                 max_bitneed = bitneed[ch][sb];
258                                 }
259                         }
260                 } else {
261                         for (ch = 0; ch < 2; ch++) {
262                                 for (sb = 0; sb < frame->subbands; sb++) {
263                                         if (frame->scale_factor[ch][sb] == 0)
264                                                 bitneed[ch][sb] = -5;
265                                         else {
266                                                 if (frame->subbands == 4)
267                                                         loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb];
268                                                 else
269                                                         loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb];
270                                                 if (loudness > 0)
271                                                         bitneed[ch][sb] = loudness / 2;
272                                                 else
273                                                         bitneed[ch][sb] = loudness;
274                                         }
275                                         if (bitneed[ch][sb] > max_bitneed)
276                                                 max_bitneed = bitneed[ch][sb];
277                                 }
278                         }
279                 }
280
281                 bitcount = 0;
282                 slicecount = 0;
283                 bitslice = max_bitneed + 1;
284                 do {
285                         bitslice--;
286                         bitcount += slicecount;
287                         slicecount = 0;
288                         for (ch = 0; ch < 2; ch++) {
289                                 for (sb = 0; sb < frame->subbands; sb++) {
290                                         if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16))
291                                                 slicecount++;
292                                         else if (bitneed[ch][sb] == bitslice + 1)
293                                                 slicecount += 2;
294                                 }
295                         }
296                 } while (bitcount + slicecount < frame->bitpool);
297
298                 if (bitcount + slicecount == frame->bitpool) {
299                         bitcount += slicecount;
300                         bitslice--;
301                 }
302
303                 for (ch = 0; ch < 2; ch++) {
304                         for (sb = 0; sb < frame->subbands; sb++) {
305                                 if (bitneed[ch][sb] < bitslice + 2) {
306                                         bits[ch][sb] = 0;
307                                 } else {
308                                         bits[ch][sb] = bitneed[ch][sb] - bitslice;
309                                         if (bits[ch][sb] > 16)
310                                                 bits[ch][sb] = 16;
311                                 }
312                         }
313                 }
314
315                 ch = 0;
316                 sb = 0;
317                 while (bitcount < frame->bitpool) {
318                         if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) {
319                                 bits[ch][sb]++;
320                                 bitcount++;
321                         } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) {
322                                 bits[ch][sb] = 2;
323                                 bitcount += 2;
324                         }
325                         if (ch == 1) {
326                                 ch = 0;
327                                 sb++;
328                                 if (sb >= frame->subbands) break;
329                         } else
330                                 ch = 1;
331                 }
332
333                 ch = 0;
334                 sb = 0;
335                 while (bitcount < frame->bitpool) {
336                         if (bits[ch][sb] < 16) {
337                                 bits[ch][sb]++;
338                                 bitcount++;
339                         }
340                         if (ch == 1) {
341                                 ch = 0;
342                                 sb++;
343                                 if (sb >= frame->subbands) break;
344                         } else
345                                 ch = 1;
346                 }
347
348         }
349
350 }
351
352 /*
353  * Unpacks a SBC frame at the beginning of the stream in data,
354  * which has at most len bytes into frame.
355  * Returns the length in bytes of the packed frame, or a negative
356  * value on error. The error codes are:
357  *
358  *  -1   Data stream too short
359  *  -2   Sync byte incorrect
360  *  -3   CRC8 incorrect
361  *  -4   Bitpool value out of bounds
362  */
363 static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame,
364                                                                 size_t len)
365 {
366         unsigned int consumed;
367         /* Will copy the parts of the header that are relevant to crc
368          * calculation here */
369         uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
370         int crc_pos = 0;
371         int32_t temp;
372
373         int audio_sample;
374         int ch, sb, blk, bit;   /* channel, subband, block and bit standard
375                                    counters */
376         int bits[2][8];         /* bits distribution */
377         uint32_t levels[2][8];  /* levels derived from that */
378
379         if (len < 4)
380                 return -1;
381
382         if (data[0] != SBC_SYNCWORD)
383                 return -2;
384
385         frame->frequency = (data[1] >> 6) & 0x03;
386
387         frame->block_mode = (data[1] >> 4) & 0x03;
388         switch (frame->block_mode) {
389         case SBC_BLK_4:
390                 frame->blocks = 4;
391                 break;
392         case SBC_BLK_8:
393                 frame->blocks = 8;
394                 break;
395         case SBC_BLK_12:
396                 frame->blocks = 12;
397                 break;
398         case SBC_BLK_16:
399                 frame->blocks = 16;
400                 break;
401         }
402
403         frame->mode = (data[1] >> 2) & 0x03;
404         switch (frame->mode) {
405         case MONO:
406                 frame->channels = 1;
407                 break;
408         case DUAL_CHANNEL:      /* fall-through */
409         case STEREO:
410         case JOINT_STEREO:
411                 frame->channels = 2;
412                 break;
413         }
414
415         frame->allocation = (data[1] >> 1) & 0x01;
416
417         frame->subband_mode = (data[1] & 0x01);
418         frame->subbands = frame->subband_mode ? 8 : 4;
419
420         frame->bitpool = data[2];
421
422         if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
423                         frame->bitpool > 16 * frame->subbands)
424                 return -4;
425
426         if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
427                         frame->bitpool > 32 * frame->subbands)
428                 return -4;
429
430         /* data[3] is crc, we're checking it later */
431
432         consumed = 32;
433
434         crc_header[0] = data[1];
435         crc_header[1] = data[2];
436         crc_pos = 16;
437
438         if (frame->mode == JOINT_STEREO) {
439                 if (len * 8 < consumed + frame->subbands)
440                         return -1;
441
442                 frame->joint = 0x00;
443                 for (sb = 0; sb < frame->subbands - 1; sb++)
444                         frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb;
445                 if (frame->subbands == 4)
446                         crc_header[crc_pos / 8] = data[4] & 0xf0;
447                 else
448                         crc_header[crc_pos / 8] = data[4];
449
450                 consumed += frame->subbands;
451                 crc_pos += frame->subbands;
452         }
453
454         if (len * 8 < consumed + (4 * frame->subbands * frame->channels))
455                 return -1;
456
457         for (ch = 0; ch < frame->channels; ch++) {
458                 for (sb = 0; sb < frame->subbands; sb++) {
459                         /* FIXME assert(consumed % 4 == 0); */
460                         frame->scale_factor[ch][sb] =
461                                 (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F;
462                         crc_header[crc_pos >> 3] |=
463                                 frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7));
464
465                         consumed += 4;
466                         crc_pos += 4;
467                 }
468         }
469
470         if (data[3] != sbc_crc8(crc_header, crc_pos))
471                 return -3;
472
473         sbc_calculate_bits(frame, bits);
474
475         for (ch = 0; ch < frame->channels; ch++) {
476                 for (sb = 0; sb < frame->subbands; sb++)
477                         levels[ch][sb] = (1 << bits[ch][sb]) - 1;
478         }
479
480         for (blk = 0; blk < frame->blocks; blk++) {
481                 for (ch = 0; ch < frame->channels; ch++) {
482                         for (sb = 0; sb < frame->subbands; sb++) {
483                                 if (levels[ch][sb] > 0) {
484                                         audio_sample = 0;
485                                         for (bit = 0; bit < bits[ch][sb]; bit++) {
486                                                 if (consumed > len * 8)
487                                                         return -1;
488
489                                                 if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01)
490                                                         audio_sample |= 1 << (bits[ch][sb] - bit - 1);
491
492                                                 consumed++;
493                                         }
494
495                                         frame->sb_sample[blk][ch][sb] =
496                                                 (((audio_sample << 1) | 1) << frame->scale_factor[ch][sb]) /
497                                                 levels[ch][sb] - (1 << frame->scale_factor[ch][sb]);
498                                 } else
499                                         frame->sb_sample[blk][ch][sb] = 0;
500                         }
501                 }
502         }
503
504         if (frame->mode == JOINT_STEREO) {
505                 for (blk = 0; blk < frame->blocks; blk++) {
506                         for (sb = 0; sb < frame->subbands; sb++) {
507                                 if (frame->joint & (0x01 << sb)) {
508                                         temp = frame->sb_sample[blk][0][sb] +
509                                                 frame->sb_sample[blk][1][sb];
510                                         frame->sb_sample[blk][1][sb] =
511                                                 frame->sb_sample[blk][0][sb] -
512                                                 frame->sb_sample[blk][1][sb];
513                                         frame->sb_sample[blk][0][sb] = temp;
514                                 }
515                         }
516                 }
517         }
518
519         if ((consumed & 0x7) != 0)
520                 consumed += 8 - (consumed & 0x7);
521
522         return consumed >> 3;
523 }
524
525 static void sbc_decoder_init(struct sbc_decoder_state *state,
526                                         const struct sbc_frame *frame)
527 {
528         int i, ch;
529
530         memset(state->V, 0, sizeof(state->V));
531         state->subbands = frame->subbands;
532
533         for (ch = 0; ch < 2; ch++)
534                 for (i = 0; i < frame->subbands * 2; i++)
535                         state->offset[ch][i] = (10 * i + 10);
536 }
537
538 static SBC_ALWAYS_INLINE int16_t sbc_clip16(int32_t s)
539 {
540         if (s > 0x7FFF)
541                 return 0x7FFF;
542         else if (s < -0x8000)
543                 return -0x8000;
544         else
545                 return s;
546 }
547
548 static inline void sbc_synthesize_four(struct sbc_decoder_state *state,
549                                 struct sbc_frame *frame, int ch, int blk)
550 {
551         int i, k, idx;
552         int32_t *v = state->V[ch];
553         int *offset = state->offset[ch];
554
555         for (i = 0; i < 8; i++) {
556                 /* Shifting */
557                 offset[i]--;
558                 if (offset[i] < 0) {
559                         offset[i] = 79;
560                         memcpy(v + 80, v, 9 * sizeof(*v));
561                 }
562
563                 /* Distribute the new matrix value to the shifted position */
564                 v[offset[i]] = SCALE4_STAGED1(
565                         MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0],
566                         MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1],
567                         MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2],
568                         MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3])))));
569         }
570
571         /* Compute the samples */
572         for (idx = 0, i = 0; i < 4; i++, idx += 5) {
573                 k = (i + 4) & 0xf;
574
575                 /* Store in output, Q0 */
576                 frame->pcm_sample[ch][blk * 4 + i] = sbc_clip16(SCALE4_STAGED1(
577                         MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0],
578                         MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0],
579                         MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1],
580                         MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1],
581                         MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2],
582                         MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2],
583                         MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3],
584                         MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3],
585                         MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4],
586                         MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))));
587         }
588 }
589
590 static inline void sbc_synthesize_eight(struct sbc_decoder_state *state,
591                                 struct sbc_frame *frame, int ch, int blk)
592 {
593         int i, j, k, idx;
594         int *offset = state->offset[ch];
595
596         for (i = 0; i < 16; i++) {
597                 /* Shifting */
598                 offset[i]--;
599                 if (offset[i] < 0) {
600                         offset[i] = 159;
601                         for (j = 0; j < 9; j++)
602                                 state->V[ch][j + 160] = state->V[ch][j];
603                 }
604
605                 /* Distribute the new matrix value to the shifted position */
606                 state->V[ch][offset[i]] = SCALE8_STAGED1(
607                         MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0],
608                         MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1],
609                         MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2],
610                         MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3],
611                         MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4],
612                         MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5],
613                         MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6],
614                         MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7])))))))));
615         }
616
617         /* Compute the samples */
618         for (idx = 0, i = 0; i < 8; i++, idx += 5) {
619                 k = (i + 8) & 0xf;
620
621                 /* Store in output, Q0 */
622                 frame->pcm_sample[ch][blk * 8 + i] = sbc_clip16(SCALE8_STAGED1(
623                         MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0],
624                         MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0],
625                         MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1],
626                         MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1],
627                         MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2],
628                         MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2],
629                         MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3],
630                         MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3],
631                         MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4],
632                         MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))));
633         }
634 }
635
636 static int sbc_synthesize_audio(struct sbc_decoder_state *state,
637                                                 struct sbc_frame *frame)
638 {
639         int ch, blk;
640
641         switch (frame->subbands) {
642         case 4:
643                 for (ch = 0; ch < frame->channels; ch++) {
644                         for (blk = 0; blk < frame->blocks; blk++)
645                                 sbc_synthesize_four(state, frame, ch, blk);
646                 }
647                 return frame->blocks * 4;
648
649         case 8:
650                 for (ch = 0; ch < frame->channels; ch++) {
651                         for (blk = 0; blk < frame->blocks; blk++)
652                                 sbc_synthesize_eight(state, frame, ch, blk);
653                 }
654                 return frame->blocks * 8;
655
656         default:
657                 return -EIO;
658         }
659 }
660
661 static int sbc_analyze_audio(struct sbc_encoder_state *state,
662                                                 struct sbc_frame *frame)
663 {
664         int ch, blk;
665         int16_t *x;
666
667         switch (frame->subbands) {
668         case 4:
669                 for (ch = 0; ch < frame->channels; ch++) {
670                         x = &state->X[ch][state->position - 16 +
671                                                         frame->blocks * 4];
672                         for (blk = 0; blk < frame->blocks; blk += 4) {
673                                 state->sbc_analyze_4b_4s(
674                                         x,
675                                         frame->sb_sample_f[blk][ch],
676                                         frame->sb_sample_f[blk + 1][ch] -
677                                         frame->sb_sample_f[blk][ch]);
678                                 x -= 16;
679                         }
680                 }
681                 return frame->blocks * 4;
682
683         case 8:
684                 for (ch = 0; ch < frame->channels; ch++) {
685                         x = &state->X[ch][state->position - 32 +
686                                                         frame->blocks * 8];
687                         for (blk = 0; blk < frame->blocks; blk += 4) {
688                                 state->sbc_analyze_4b_8s(
689                                         x,
690                                         frame->sb_sample_f[blk][ch],
691                                         frame->sb_sample_f[blk + 1][ch] -
692                                         frame->sb_sample_f[blk][ch]);
693                                 x -= 32;
694                         }
695                 }
696                 return frame->blocks * 8;
697
698         default:
699                 return -EIO;
700         }
701 }
702
703 /* Supplementary bitstream writing macros for 'sbc_pack_frame' */
704
705 #define PUT_BITS(data_ptr, bits_cache, bits_count, v, n)                \
706         do {                                                            \
707                 bits_cache = (v) | (bits_cache << (n));                 \
708                 bits_count += (n);                                      \
709                 if (bits_count >= 16) {                                 \
710                         bits_count -= 8;                                \
711                         *data_ptr++ = (uint8_t)                         \
712                                 (bits_cache >> bits_count);             \
713                         bits_count -= 8;                                \
714                         *data_ptr++ = (uint8_t)                         \
715                                 (bits_cache >> bits_count);             \
716                 }                                                       \
717         } while (0)
718
719 #define FLUSH_BITS(data_ptr, bits_cache, bits_count)                    \
720         do {                                                            \
721                 while (bits_count >= 8) {                               \
722                         bits_count -= 8;                                \
723                         *data_ptr++ = (uint8_t)                         \
724                                 (bits_cache >> bits_count);             \
725                 }                                                       \
726                 if (bits_count > 0)                                     \
727                         *data_ptr++ = (uint8_t)                         \
728                                 (bits_cache << (8 - bits_count));       \
729         } while (0)
730
731 /*
732  * Packs the SBC frame from frame into the memory at data. At most len
733  * bytes will be used, should more memory be needed an appropriate
734  * error code will be returned. Returns the length of the packed frame
735  * on success or a negative value on error.
736  *
737  * The error codes are:
738  * -1 Not enough memory reserved
739  * -2 Unsupported sampling rate
740  * -3 Unsupported number of blocks
741  * -4 Unsupported number of subbands
742  * -5 Bitpool value out of bounds
743  * -99 not implemented
744  */
745
746 static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(uint8_t *data,
747                                         struct sbc_frame *frame, size_t len,
748                                         int frame_subbands, int frame_channels,
749                                         int joint)
750 {
751         /* Bitstream writer starts from the fourth byte */
752         uint8_t *data_ptr = data + 4;
753         uint32_t bits_cache = 0;
754         uint32_t bits_count = 0;
755
756         /* Will copy the header parts for CRC-8 calculation here */
757         uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
758         int crc_pos = 0;
759
760         uint32_t audio_sample;
761
762         int ch, sb, blk;        /* channel, subband, block and bit counters */
763         int bits[2][8];         /* bits distribution */
764         uint32_t levels[2][8];  /* levels are derived from that */
765         uint32_t sb_sample_delta[2][8];
766
767         data[0] = SBC_SYNCWORD;
768
769         data[1] = (frame->frequency & 0x03) << 6;
770
771         data[1] |= (frame->block_mode & 0x03) << 4;
772
773         data[1] |= (frame->mode & 0x03) << 2;
774
775         data[1] |= (frame->allocation & 0x01) << 1;
776
777         switch (frame_subbands) {
778         case 4:
779                 /* Nothing to do */
780                 break;
781         case 8:
782                 data[1] |= 0x01;
783                 break;
784         default:
785                 return -4;
786                 break;
787         }
788
789         data[2] = frame->bitpool;
790
791         if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) &&
792                         frame->bitpool > frame_subbands << 4)
793                 return -5;
794
795         if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) &&
796                         frame->bitpool > frame_subbands << 5)
797                 return -5;
798
799         /* Can't fill in crc yet */
800
801         crc_header[0] = data[1];
802         crc_header[1] = data[2];
803         crc_pos = 16;
804
805         if (frame->mode == JOINT_STEREO) {
806                 PUT_BITS(data_ptr, bits_cache, bits_count,
807                         joint, frame_subbands);
808                 crc_header[crc_pos >> 3] = joint;
809                 crc_pos += frame_subbands;
810         }
811
812         for (ch = 0; ch < frame_channels; ch++) {
813                 for (sb = 0; sb < frame_subbands; sb++) {
814                         PUT_BITS(data_ptr, bits_cache, bits_count,
815                                 frame->scale_factor[ch][sb] & 0x0F, 4);
816                         crc_header[crc_pos >> 3] <<= 4;
817                         crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F;
818                         crc_pos += 4;
819                 }
820         }
821
822         /* align the last crc byte */
823         if (crc_pos % 8)
824                 crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8);
825
826         data[3] = sbc_crc8(crc_header, crc_pos);
827
828         sbc_calculate_bits(frame, bits);
829
830         for (ch = 0; ch < frame_channels; ch++) {
831                 for (sb = 0; sb < frame_subbands; sb++) {
832                         levels[ch][sb] = ((1 << bits[ch][sb]) - 1) <<
833                                 (32 - (frame->scale_factor[ch][sb] +
834                                         SCALE_OUT_BITS + 2));
835                         sb_sample_delta[ch][sb] = (uint32_t) 1 <<
836                                 (frame->scale_factor[ch][sb] +
837                                         SCALE_OUT_BITS + 1);
838                 }
839         }
840
841         for (blk = 0; blk < frame->blocks; blk++) {
842                 for (ch = 0; ch < frame_channels; ch++) {
843                         for (sb = 0; sb < frame_subbands; sb++) {
844
845                                 if (bits[ch][sb] == 0)
846                                         continue;
847
848                                 audio_sample = ((uint64_t) levels[ch][sb] *
849                                         (sb_sample_delta[ch][sb] +
850                                         frame->sb_sample_f[blk][ch][sb])) >> 32;
851
852                                 PUT_BITS(data_ptr, bits_cache, bits_count,
853                                         audio_sample, bits[ch][sb]);
854                         }
855                 }
856         }
857
858         FLUSH_BITS(data_ptr, bits_cache, bits_count);
859
860         return data_ptr - data;
861 }
862
863 static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len,
864                                                                 int joint)
865 {
866         if (frame->subbands == 4) {
867                 if (frame->channels == 1)
868                         return sbc_pack_frame_internal(
869                                 data, frame, len, 4, 1, joint);
870                 else
871                         return sbc_pack_frame_internal(
872                                 data, frame, len, 4, 2, joint);
873         } else {
874                 if (frame->channels == 1)
875                         return sbc_pack_frame_internal(
876                                 data, frame, len, 8, 1, joint);
877                 else
878                         return sbc_pack_frame_internal(
879                                 data, frame, len, 8, 2, joint);
880         }
881 }
882
883 static void sbc_encoder_init(struct sbc_encoder_state *state,
884                                         const struct sbc_frame *frame)
885 {
886         memset(&state->X, 0, sizeof(state->X));
887         state->position = (SBC_X_BUFFER_SIZE - frame->subbands * 9) & ~7;
888
889         sbc_init_primitives(state);
890 }
891
892 struct sbc_priv {
893         int init;
894         struct SBC_ALIGNED sbc_frame frame;
895         struct SBC_ALIGNED sbc_decoder_state dec_state;
896         struct SBC_ALIGNED sbc_encoder_state enc_state;
897 };
898
899 static void sbc_set_defaults(sbc_t *sbc, unsigned long flags)
900 {
901         sbc->frequency = SBC_FREQ_44100;
902         sbc->mode = SBC_MODE_STEREO;
903         sbc->subbands = SBC_SB_8;
904         sbc->blocks = SBC_BLK_16;
905         sbc->bitpool = 32;
906 #if __BYTE_ORDER == __LITTLE_ENDIAN
907         sbc->endian = SBC_LE;
908 #elif __BYTE_ORDER == __BIG_ENDIAN
909         sbc->endian = SBC_BE;
910 #else
911 #error "Unknown byte order"
912 #endif
913 }
914
915 int sbc_init(sbc_t *sbc, unsigned long flags)
916 {
917         if (!sbc)
918                 return -EIO;
919
920         memset(sbc, 0, sizeof(sbc_t));
921
922         sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK);
923         if (!sbc->priv_alloc_base)
924                 return -ENOMEM;
925
926         sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base +
927                         SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK));
928
929         memset(sbc->priv, 0, sizeof(struct sbc_priv));
930
931         sbc_set_defaults(sbc, flags);
932
933         return 0;
934 }
935
936 ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len)
937 {
938         return sbc_decode(sbc, input, input_len, NULL, 0, NULL);
939 }
940
941 ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len,
942                         void *output, size_t output_len, size_t *written)
943 {
944         struct sbc_priv *priv;
945         char *ptr;
946         int i, ch, framelen, samples;
947
948         if (!sbc || !input)
949                 return -EIO;
950
951         priv = sbc->priv;
952
953         framelen = sbc_unpack_frame(input, &priv->frame, input_len);
954
955         if (!priv->init) {
956                 sbc_decoder_init(&priv->dec_state, &priv->frame);
957                 priv->init = 1;
958
959                 sbc->frequency = priv->frame.frequency;
960                 sbc->mode = priv->frame.mode;
961                 sbc->subbands = priv->frame.subband_mode;
962                 sbc->blocks = priv->frame.block_mode;
963                 sbc->allocation = priv->frame.allocation;
964                 sbc->bitpool = priv->frame.bitpool;
965
966                 priv->frame.codesize = sbc_get_codesize(sbc);
967                 priv->frame.length = framelen;
968         }
969
970         if (!output)
971                 return framelen;
972
973         if (written)
974                 *written = 0;
975
976         if (framelen <= 0)
977                 return framelen;
978
979         samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame);
980
981         ptr = output;
982
983         if (output_len < (size_t) (samples * priv->frame.channels * 2))
984                 samples = output_len / (priv->frame.channels * 2);
985
986         for (i = 0; i < samples; i++) {
987                 for (ch = 0; ch < priv->frame.channels; ch++) {
988                         int16_t s;
989                         s = priv->frame.pcm_sample[ch][i];
990
991                         if (sbc->endian == SBC_BE) {
992                                 *ptr++ = (s & 0xff00) >> 8;
993                                 *ptr++ = (s & 0x00ff);
994                         } else {
995                                 *ptr++ = (s & 0x00ff);
996                                 *ptr++ = (s & 0xff00) >> 8;
997                         }
998                 }
999         }
1000
1001         if (written)
1002                 *written = samples * priv->frame.channels * 2;
1003
1004         return framelen;
1005 }
1006
1007 ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
1008                         void *output, size_t output_len, size_t *written)
1009 {
1010         struct sbc_priv *priv;
1011         int framelen, samples;
1012         int (*sbc_enc_process_input)(int position,
1013                         const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
1014                         int nsamples, int nchannels);
1015
1016         if (!sbc || !input)
1017                 return -EIO;
1018
1019         priv = sbc->priv;
1020
1021         if (written)
1022                 *written = 0;
1023
1024         if (!priv->init) {
1025                 priv->frame.frequency = sbc->frequency;
1026                 priv->frame.mode = sbc->mode;
1027                 priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
1028                 priv->frame.allocation = sbc->allocation;
1029                 priv->frame.subband_mode = sbc->subbands;
1030                 priv->frame.subbands = sbc->subbands ? 8 : 4;
1031                 priv->frame.block_mode = sbc->blocks;
1032                 priv->frame.blocks = 4 + (sbc->blocks * 4);
1033                 priv->frame.bitpool = sbc->bitpool;
1034                 priv->frame.codesize = sbc_get_codesize(sbc);
1035                 priv->frame.length = sbc_get_frame_length(sbc);
1036
1037                 sbc_encoder_init(&priv->enc_state, &priv->frame);
1038                 priv->init = 1;
1039         }
1040
1041         /* input must be large enough to encode a complete frame */
1042         if (input_len < priv->frame.codesize)
1043                 return 0;
1044
1045         /* output must be large enough to receive the encoded frame */
1046         if (!output || output_len < priv->frame.length)
1047                 return -ENOSPC;
1048
1049         /* Select the needed input data processing function and call it */
1050         if (priv->frame.subbands == 8) {
1051                 if (sbc->endian == SBC_BE)
1052                         sbc_enc_process_input =
1053                                 priv->enc_state.sbc_enc_process_input_8s_be;
1054                 else
1055                         sbc_enc_process_input =
1056                                 priv->enc_state.sbc_enc_process_input_8s_le;
1057         } else {
1058                 if (sbc->endian == SBC_BE)
1059                         sbc_enc_process_input =
1060                                 priv->enc_state.sbc_enc_process_input_4s_be;
1061                 else
1062                         sbc_enc_process_input =
1063                                 priv->enc_state.sbc_enc_process_input_4s_le;
1064         }
1065
1066         priv->enc_state.position = sbc_enc_process_input(
1067                 priv->enc_state.position, (const uint8_t *) input,
1068                 priv->enc_state.X, priv->frame.subbands * priv->frame.blocks,
1069                 priv->frame.channels);
1070
1071         samples = sbc_analyze_audio(&priv->enc_state, &priv->frame);
1072
1073         if (priv->frame.mode == JOINT_STEREO) {
1074                 int j = priv->enc_state.sbc_calc_scalefactors_j(
1075                         priv->frame.sb_sample_f, priv->frame.scale_factor,
1076                         priv->frame.blocks, priv->frame.subbands);
1077                 framelen = sbc_pack_frame(output, &priv->frame, output_len, j);
1078         } else {
1079                 priv->enc_state.sbc_calc_scalefactors(
1080                         priv->frame.sb_sample_f, priv->frame.scale_factor,
1081                         priv->frame.blocks, priv->frame.channels,
1082                         priv->frame.subbands);
1083                 framelen = sbc_pack_frame(output, &priv->frame, output_len, 0);
1084         }
1085
1086         if (written)
1087                 *written = framelen;
1088
1089         return samples * priv->frame.channels * 2;
1090 }
1091
1092 void sbc_finish(sbc_t *sbc)
1093 {
1094         if (!sbc)
1095                 return;
1096
1097         free(sbc->priv_alloc_base);
1098
1099         memset(sbc, 0, sizeof(sbc_t));
1100 }
1101
1102 size_t sbc_get_frame_length(sbc_t *sbc)
1103 {
1104         int ret;
1105         uint8_t subbands, channels, blocks, joint, bitpool;
1106         struct sbc_priv *priv;
1107
1108         priv = sbc->priv;
1109         if (priv->init)
1110                 return priv->frame.length;
1111
1112         subbands = sbc->subbands ? 8 : 4;
1113         blocks = 4 + (sbc->blocks * 4);
1114         channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
1115         joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0;
1116         bitpool = sbc->bitpool;
1117
1118         ret = 4 + (4 * subbands * channels) / 8;
1119         /* This term is not always evenly divide so we round it up */
1120         if (channels == 1)
1121                 ret += ((blocks * channels * bitpool) + 7) / 8;
1122         else
1123                 ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8;
1124
1125         return ret;
1126 }
1127
1128 unsigned sbc_get_frame_duration(sbc_t *sbc)
1129 {
1130         uint8_t subbands, blocks;
1131         uint16_t frequency;
1132         struct sbc_priv *priv;
1133
1134         priv = sbc->priv;
1135         if (!priv->init) {
1136                 subbands = sbc->subbands ? 8 : 4;
1137                 blocks = 4 + (sbc->blocks * 4);
1138         } else {
1139                 subbands = priv->frame.subbands;
1140                 blocks = priv->frame.blocks;
1141         }
1142
1143         switch (sbc->frequency) {
1144         case SBC_FREQ_16000:
1145                 frequency = 16000;
1146                 break;
1147
1148         case SBC_FREQ_32000:
1149                 frequency = 32000;
1150                 break;
1151
1152         case SBC_FREQ_44100:
1153                 frequency = 44100;
1154                 break;
1155
1156         case SBC_FREQ_48000:
1157                 frequency = 48000;
1158                 break;
1159         default:
1160                 return 0;
1161         }
1162
1163         return (1000000 * blocks * subbands) / frequency;
1164 }
1165
1166 size_t sbc_get_codesize(sbc_t *sbc)
1167 {
1168         uint16_t subbands, channels, blocks;
1169         struct sbc_priv *priv;
1170
1171         priv = sbc->priv;
1172         if (!priv->init) {
1173                 subbands = sbc->subbands ? 8 : 4;
1174                 blocks = 4 + (sbc->blocks * 4);
1175                 channels = sbc->mode == SBC_MODE_MONO ? 1 : 2;
1176         } else {
1177                 subbands = priv->frame.subbands;
1178                 blocks = priv->frame.blocks;
1179                 channels = priv->frame.channels;
1180         }
1181
1182         return subbands * blocks * channels * 2;
1183 }
1184
1185 const char *sbc_get_implementation_info(sbc_t *sbc)
1186 {
1187         struct sbc_priv *priv;
1188
1189         if (!sbc)
1190                 return NULL;
1191
1192         priv = sbc->priv;
1193         if (!priv)
1194                 return NULL;
1195
1196         return priv->enc_state.implementation_info;
1197 }
1198
1199 int sbc_reinit(sbc_t *sbc, unsigned long flags)
1200 {
1201         struct sbc_priv *priv;
1202
1203         if (!sbc || !sbc->priv)
1204                 return -EIO;
1205
1206         priv = sbc->priv;
1207
1208         if (priv->init == 1)
1209                 memset(sbc->priv, 0, sizeof(struct sbc_priv));
1210
1211         sbc_set_defaults(sbc, flags);
1212
1213         return 0;
1214 }