2 * Copyright (C) 2011 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
22 #include <sys/cdefs.h>
26 /* The memcpy_* conversion routines are designed to work in-place on same dst as src
27 * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
28 * This allows the loops to go upwards for faster cache access (and may be more flexible
29 * for future optimization later).
33 * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
34 * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
35 * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
36 * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
37 * is dithered and the remaining fraction is converted to the output Q.15, with clamping
38 * on the 4 integer guard bits.
40 * For interleaved stereo, c is the number of sample pairs,
41 * and out is an array of interleaved pairs of 16-bit samples per channel.
42 * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
43 * The name "dither" is a misnomer; the current implementation does not actually dither
44 * but uses truncation. This may change.
45 * The out and sums buffers must either be completely separate (non-overlapping), or
46 * they must both start at the same address. Partially overlapping buffers are not supported.
48 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
50 /* Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
52 * dst Destination buffer
54 * count Number of samples to copy
55 * The destination and source buffers must either be completely separate (non-overlapping), or
56 * they must both start at the same address. Partially overlapping buffers are not supported.
58 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
60 /* Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
62 * dst Destination buffer
64 * count Number of samples to copy
65 * The destination and source buffers must either be completely separate (non-overlapping), or
66 * they must both start at the same address. Partially overlapping buffers are not supported.
67 * The conversion is done by truncation, without dithering, so it loses resolution.
69 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
71 /* Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
73 * dst Destination buffer
75 * count Number of samples to copy
76 * The destination and source buffers must either be completely separate (non-overlapping), or
77 * they must both start at the same address. Partially overlapping buffers are not supported.
78 * The conversion is done by truncation, without dithering, so it loses resolution.
80 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
82 /* Shrink and copy samples from single-precision floating-point to signed 16-bit.
83 * Each float should be in the range -1.0 to 1.0. Values outside that range are clamped,
84 * refer to clamp16_from_float().
86 * dst Destination buffer
88 * count Number of samples to copy
89 * The destination and source buffers must either be completely separate (non-overlapping), or
90 * they must both start at the same address. Partially overlapping buffers are not supported.
91 * The conversion is done by truncation, without dithering, so it loses resolution.
93 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
95 /* Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
96 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
97 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0]. Note the closed range
98 * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
100 * dst Destination buffer
102 * count Number of samples to copy
103 * The destination and source buffers must either be completely separate (non-overlapping), or
104 * they must both start at the same address. Partially overlapping buffers are not supported.
106 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
108 /* Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
109 * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
110 * No rounding is needed as the representation is exact.
112 * dst Destination buffer
114 * count Number of samples to copy
115 * The destination and source buffers must be completely separate.
117 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
119 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
120 * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
121 * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
122 * No rounding is needed as the representation is exact.
124 * dst Destination buffer
126 * count Number of samples to copy
127 * The destination and source buffers must be completely separate.
129 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
131 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
132 * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
133 * The data is truncated without rounding.
135 * dst Destination buffer
137 * count Number of samples to copy
138 * The destination and source buffers must either be completely separate (non-overlapping), or
139 * they must both start at the same address. Partially overlapping buffers are not supported.
141 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
143 /* Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
144 * The packed 24 bit output is assumed to be a little-endian uint8_t byte array.
145 * The output data range is [0x800000, 0x7fff00] (not full).
146 * Nevertheless there is no DC offset on the output, if the input has no DC offset.
148 * dst Destination buffer
150 * count Number of samples to copy
151 * The destination and source buffers must be completely separate.
153 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
155 /* Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
156 * The packed 24 bit output is assumed to be a little-endian uint8_t byte array.
157 * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
160 * dst Destination buffer
162 * count Number of samples to copy
163 * The destination and source buffers must either be completely separate (non-overlapping), or
164 * they must both start at the same address. Partially overlapping buffers are not supported.
166 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
168 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
169 * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
171 * dst Destination buffer
173 * count Number of samples to copy
174 * The destination and source buffers must be completely separate.
176 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
178 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
179 * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
180 * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
181 * See clamp24_from_float() for details.
183 * dst Destination buffer
185 * count Number of samples to copy
186 * The destination and source buffers must either be completely separate (non-overlapping), or
187 * they must both start at the same address. Partially overlapping buffers are not supported.
189 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
191 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
192 * The conversion will use the full available Q4.27 range, including guard bits.
193 * Fractional lsb is rounded to nearest, ties away from zero.
194 * See clampq4_27_from_float() for details.
196 * dst Destination buffer
198 * count Number of samples to copy
199 * The destination and source buffers must either be completely separate (non-overlapping), or
200 * they must both start at the same address. Partially overlapping buffers are not supported.
202 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
204 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
205 * The data is clamped, and truncated without rounding.
207 * dst Destination buffer
209 * count Number of samples to copy
210 * The destination and source buffers must either be completely separate (non-overlapping), or
211 * they must both start at the same address. Partially overlapping buffers are not supported.
213 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
215 /* Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
216 * The nominal output float range is [-1.0, 1.0) for the fixed-point
217 * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
218 * No rounding is needed as the representation is exact for nominal values.
219 * Rounding for overflow values is to nearest, ties to even.
221 * dst Destination buffer
223 * count Number of samples to copy
224 * The destination and source buffers must either be completely separate (non-overlapping), or
225 * they must both start at the same address. Partially overlapping buffers are not supported.
227 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
229 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
230 * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
232 * dst Destination buffer
234 * count Number of samples to copy
235 * The destination and source buffers must be completely separate.
237 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
239 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
240 * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
241 * ties away from zero. See clamp32_from_float() for details.
243 * dst Destination buffer
245 * count Number of samples to copy
246 * The destination and source buffers must either be completely separate (non-overlapping), or
247 * they must both start at the same address. Partially overlapping buffers are not supported.
249 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
251 /* Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
252 * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
253 * Rounding is done according to float_from_i32().
255 * dst Destination buffer
257 * count Number of samples to copy
258 * The destination and source buffers must either be completely separate (non-overlapping), or
259 * they must both start at the same address. Partially overlapping buffers are not supported.
261 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
263 /* Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
265 * dst Destination buffer
267 * count Number of stereo frames to downmix
268 * The destination and source buffers must be completely separate (non-overlapping).
269 * The current implementation truncates the sum rather than dither, but this may change.
271 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
273 /* Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
276 * dst Destination buffer
278 * count Number of mono samples to upmix
279 * The destination and source buffers must be completely separate (non-overlapping).
281 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
283 /* Return the total number of non-zero 32-bit samples */
284 size_t nonZeroMono32(const int32_t *samples, size_t count);
286 /* Return the total number of non-zero 16-bit samples */
287 size_t nonZeroMono16(const int16_t *samples, size_t count);
289 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
290 * if either of its constituent 32-bit samples is non-zero
292 size_t nonZeroStereo32(const int32_t *frames, size_t count);
294 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
295 * if either of its constituent 16-bit samples is non-zero
297 size_t nonZeroStereo16(const int16_t *frames, size_t count);
300 * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
302 static inline int16_t clamp16(int32_t sample)
304 if ((sample>>15) ^ (sample>>31))
305 sample = 0x7FFF ^ (sample>>31);
310 * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
311 * with clamping. Note the open bound at 1.0, values within 1/65536 of 1.0 map
312 * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
314 * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
315 * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
316 * depending on the sign bit inside NaN (whose representation is not unique).
317 * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
319 * Rounding of 0.5 lsb is to even (default for IEEE 754).
321 static inline int16_t clamp16_from_float(float f)
323 /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
324 * floating point significand. The normal shift is 3<<22, but the -15 offset
325 * is used to multiply by 32768.
327 static const float offset = (float)(3 << (22 - 15));
328 /* zero = (0x10f << 22) = 0x43c00000 (not directly used) */
329 static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
330 static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
337 u.f = f + offset; /* recenter valid range */
338 /* Now the valid range is represented as integers between [limneg, limpos].
339 * Clamp using the fact that float representation (as an integer) is an ordered set.
343 else if (u.i > limpos)
345 return u.i; /* Return lower 16 bits, the part of interest in the significand. */
348 /* Convert a single-precision floating point value to a Q0.23 integer value, stored in a
349 * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
351 * Rounds to nearest, ties away from 0.
353 * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
354 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
355 * depending on hardware and future implementation of this function.
357 static inline int32_t clamp24_from_float(float f)
359 static const float scale = (float)(1 << 23);
360 static const float limpos = 0x7fffff / scale;
361 static const float limneg = -0x800000 / scale;
365 } else if (f >= limpos) {
369 /* integer conversion is through truncation (though int to float is not).
370 * ensure that we round to nearest, ties away from 0.
372 return f > 0 ? f + 0.5 : f - 0.5;
375 /* Convert a single-precision floating point value to a Q4.27 integer value.
376 * Rounds to nearest, ties away from 0.
378 * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
379 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
380 * depending on hardware and future implementation of this function.
382 static inline int32_t clampq4_27_from_float(float f)
384 static const float scale = (float)(1UL << 27);
385 static const float limpos = 16.;
386 static const float limneg = -16.;
389 return -0x80000000; /* or 0x80000000 */
390 } else if (f >= limpos) {
394 /* integer conversion is through truncation (though int to float is not).
395 * ensure that we round to nearest, ties away from 0.
397 return f > 0 ? f + 0.5 : f - 0.5;
400 /* Convert a single-precision floating point value to a Q0.31 integer value.
401 * Rounds to nearest, ties away from 0.
403 * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
404 * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
405 * depending on hardware and future implementation of this function.
407 static inline int32_t clamp32_from_float(float f)
409 static const float scale = (float)(1UL << 31);
410 static const float limpos = 1.;
411 static const float limneg = -1.;
414 return -0x80000000; /* or 0x80000000 */
415 } else if (f >= limpos) {
419 /* integer conversion is through truncation (though int to float is not).
420 * ensure that we round to nearest, ties away from 0.
422 return f > 0 ? f + 0.5 : f - 0.5;
425 /* Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
426 * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
427 * [0xf8000000, 0x07ffffff]. The full float range is [-16.0, 16.0].
429 * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
430 * In more detail: if the fixed-point integer exceeds 24 bit significand of single
431 * precision floating point, the 0.5 lsb in the significand conversion will round
432 * towards even, as per IEEE 754 default.
434 static inline float float_from_q4_27(int32_t ival)
436 /* The scale factor is the reciprocal of the fractional bits.
438 * Since the scale factor is a power of 2, the scaling is exact, and there
439 * is no rounding due to the multiplication - the bit pattern is preserved.
440 * However, there may be rounding due to the fixed-point to float conversion,
441 * as described above.
443 static const float scale = 1. / (float)(1UL << 27);
448 /* Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
449 * The output float range is [-1.0, 1.0) for the fixed-point range
452 * There is no rounding, the conversion and representation is exact.
454 static inline float float_from_i16(int16_t ival)
456 /* The scale factor is the reciprocal of the nominal 16 bit integer
457 * half-sided range (32768).
459 * Since the scale factor is a power of 2, the scaling is exact, and there
460 * is no rounding due to the multiplication - the bit pattern is preserved.
462 static const float scale = 1. / (float)(1UL << 15);
467 /* Convert a packed 24bit Q0.23 value stored little-endian in a uint8_t ptr
468 * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
469 * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
470 * Even though the output range is limited on the positive side, there is no
471 * DC offset on the output, if the input has no DC offset.
473 * Avoid relying on the limited output range, as future implementations may go
476 static inline int32_t i32_from_p24(const uint8_t *packed24)
479 #ifdef HAVE_BIG_ENDIAN
480 return (packed24[2] << 8) | (packed24[1] << 16) | (packed24[0] << 24);
482 #ifndef HAVE_LITTLE_ENDIAN
483 /* check to see if we really have one or the other android endian flags set. */
484 #warning "Both HAVE_LITTLE_ENDIAN and HAVE_BIG_ENDIAN not defined, default to little endian"
486 return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
490 /* Convert a 32-bit Q0.31 value to single-precision floating-point.
491 * The output float range is [-1.0, 1.0] for the fixed-point range
492 * [0x80000000, 0x7fffffff].
494 * Rounding may occur in the least significant 8 bits for large fixed point
495 * values due to storage into the 24-bit floating-point significand.
496 * Rounding will be to nearest, ties to even.
498 static inline float float_from_i32(int32_t ival)
500 static const float scale = 1. / (float)(1UL << 31);
505 /* Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
506 * to single-precision floating-point. The output float range is [-1.0, 1.0)
507 * for the fixed-point range [0x800000, 0x7fffff].
509 * There is no rounding, the conversion and representation is exact.
511 static inline float float_from_p24(const uint8_t *packed24)
513 return float_from_i32(i32_from_p24(packed24));
516 /* Convert a 24-bit Q8.23 value to single-precision floating-point.
517 * The nominal output float range is [-1.0, 1.0) for the fixed-point
518 * range [0xff800000, 0x007fffff]. The maximum float range is [-256.0, 256.0).
520 * There is no rounding in the nominal range, the conversion and representation
521 * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
523 static inline float float_from_q8_23(int32_t ival)
525 static const float scale = 1. / (float)(1UL << 23);
531 * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
534 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
536 #if defined(__arm__) && !defined(__thumb__)
538 asm( "smlabb %[out], %[in], %[v], %[a] \n"
540 : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
544 return a + in * (int32_t)v;
549 * Multiply 16-bit terms with 32-bit result: return in*v.
552 int32_t mul(int16_t in, int16_t v)
554 #if defined(__arm__) && !defined(__thumb__)
556 asm( "smulbb %[out], %[in], %[v] \n"
558 : [in]"%r"(in), [v]"r"(v)
562 return in * (int32_t)v;
567 * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
570 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
572 #if defined(__arm__) && !defined(__thumb__)
575 asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
577 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
580 asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
582 : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
588 return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
590 return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
596 * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
599 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
601 #if defined(__arm__) && !defined(__thumb__)
604 asm( "smulbb %[out], %[inRL], %[vRL] \n"
606 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
609 asm( "smultt %[out], %[inRL], %[vRL] \n"
611 : [inRL]"%r"(inRL), [vRL]"r"(vRL)
617 return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
619 return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
626 #endif // ANDROID_AUDIO_PRIMITIVES_H