audio_utils/include/audio_utils/primitives.h

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
  18 #define ANDROID_AUDIO_PRIMITIVES_H
  19
  20 #include <stdint.h>
  21 #include <stdlib.h>
  22 #include <sys/cdefs.h>
  23
  24 __BEGIN_DECLS
  25
  26 /* The memcpy_* conversion routines are designed to work in-place on same dst as src
  27  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
  28  * This allows the loops to go upwards for faster cache access (and may be more flexible
  29  * for future optimization later).
  30  */
  31
  32 /**
  33  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
  34  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
  35  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
  36  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
  37  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
  38  * on the 4 integer guard bits.
  39  *
  40  * For interleaved stereo, c is the number of sample pairs,
  41  * and out is an array of interleaved pairs of 16-bit samples per channel.
  42  * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
  43  * The name "dither" is a misnomer; the current implementation does not actually dither
  44  * but uses truncation.  This may change.
  45  * The out and sums buffers must either be completely separate (non-overlapping), or
  46  * they must both start at the same address.  Partially overlapping buffers are not supported.
  47  */
  48 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
  49
  50 /* Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
  51  * Parameters:
  52  *  dst     Destination buffer
  53  *  src     Source buffer
  54  *  count   Number of samples to copy
  55  * The destination and source buffers must either be completely separate (non-overlapping), or
  56  * they must both start at the same address.  Partially overlapping buffers are not supported.
  57  */
  58 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
  59
  60 /* Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
  61  * Parameters:
  62  *  dst     Destination buffer
  63  *  src     Source buffer
  64  *  count   Number of samples to copy
  65  * The destination and source buffers must either be completely separate (non-overlapping), or
  66  * they must both start at the same address.  Partially overlapping buffers are not supported.
  67  * The conversion is done by truncation, without dithering, so it loses resolution.
  68  */
  69 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
  70
  71 /* Copy samples from float to unsigned 8-bit offset by 0x80.
  72  * Parameters:
  73  *  dst     Destination buffer
  74  *  src     Source buffer
  75  *  count   Number of samples to copy
  76  * The destination and source buffers must either be completely separate (non-overlapping), or
  77  * they must both start at the same address.  Partially overlapping buffers are not supported.
  78  * The conversion is done by truncation, without dithering, so it loses resolution.
  79  */
  80 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
  81
  82 /* Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
  83  * Parameters:
  84  *  dst     Destination buffer
  85  *  src     Source buffer
  86  *  count   Number of samples to copy
  87  * The destination and source buffers must either be completely separate (non-overlapping), or
  88  * they must both start at the same address.  Partially overlapping buffers are not supported.
  89  * The conversion is done by truncation, without dithering, so it loses resolution.
  90  */
  91 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
  92
  93 /* Shrink and copy samples from single-precision floating-point to signed 16-bit.
  94  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
  95  * refer to clamp16_from_float().
  96  * Parameters:
  97  *  dst     Destination buffer
  98  *  src     Source buffer
  99  *  count   Number of samples to copy
 100  * The destination and source buffers must either be completely separate (non-overlapping), or
 101  * they must both start at the same address.  Partially overlapping buffers are not supported.
 102  * The conversion is done by truncation, without dithering, so it loses resolution.
 103  */
 104 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
 105
 106 /* Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
 107  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
 108  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
 109  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
 110  * Parameters:
 111  *  dst     Destination buffer
 112  *  src     Source buffer
 113  *  count   Number of samples to copy
 114  * The destination and source buffers must either be completely separate (non-overlapping), or
 115  * they must both start at the same address.  Partially overlapping buffers are not supported.
 116  */
 117 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
 118
 119 /* Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
 120  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
 121  * No rounding is needed as the representation is exact.
 122  * Parameters:
 123  *  dst     Destination buffer
 124  *  src     Source buffer
 125  *  count   Number of samples to copy
 126  * The destination and source buffers must be completely separate.
 127  */
 128 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
 129
 130 /* Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
 131  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
 132  * No rounding is needed as the representation is exact.
 133  * Parameters:
 134  *  dst     Destination buffer
 135  *  src     Source buffer
 136  *  count   Number of samples to copy
 137  * The destination and source buffers must be completely separate.
 138  */
 139 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
 140
 141 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
 142  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
 143  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
 144  * No rounding is needed as the representation is exact.
 145  * Parameters:
 146  *  dst     Destination buffer
 147  *  src     Source buffer
 148  *  count   Number of samples to copy
 149  * The destination and source buffers must be completely separate.
 150  */
 151 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
 152
 153 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
 154  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
 155  * The data is truncated without rounding.
 156  * Parameters:
 157  *  dst     Destination buffer
 158  *  src     Source buffer
 159  *  count   Number of samples to copy
 160  * The destination and source buffers must either be completely separate (non-overlapping), or
 161  * they must both start at the same address.  Partially overlapping buffers are not supported.
 162  */
 163 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
 164
 165 /* Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
 166  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 167  * The output data range is [0x800000, 0x7fff00] (not full).
 168  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
 169  * Parameters:
 170  *  dst     Destination buffer
 171  *  src     Source buffer
 172  *  count   Number of samples to copy
 173  * The destination and source buffers must be completely separate.
 174  */
 175 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
 176
 177 /* Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
 178  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 179  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
 180  * for details.
 181  * Parameters:
 182  *  dst     Destination buffer
 183  *  src     Source buffer
 184  *  count   Number of samples to copy
 185  * The destination and source buffers must either be completely separate (non-overlapping), or
 186  * they must both start at the same address.  Partially overlapping buffers are not supported.
 187  */
 188 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
 189
 190 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
 191  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 192  * The data is clamped to the range is [0x800000, 0x7fffff].
 193  * Parameters:
 194  *  dst     Destination buffer
 195  *  src     Source buffer
 196  *  count   Number of samples to copy
 197  * The destination and source buffers must be completely separate.
 198  */
 199 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
 200
 201 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
 202  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
 203  * Parameters:
 204  *  dst     Destination buffer
 205  *  src     Source buffer
 206  *  count   Number of samples to copy
 207  * The destination and source buffers must be completely separate.
 208  */
 209 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
 210
 211 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
 212  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
 213  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
 214  * See clamp24_from_float() for details.
 215  * Parameters:
 216  *  dst     Destination buffer
 217  *  src     Source buffer
 218  *  count   Number of samples to copy
 219  * The destination and source buffers must either be completely separate (non-overlapping), or
 220  * they must both start at the same address.  Partially overlapping buffers are not supported.
 221  */
 222 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
 223
 224 /* Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
 225  * The output data range is [0xff800000, 0x007fffff].
 226  * Parameters:
 227  *  dst     Destination buffer
 228  *  src     Source buffer
 229  *  count   Number of samples to copy
 230  * The destination and source buffers must be completely separate.
 231  */
 232 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
 233
 234 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
 235  * The conversion will use the full available Q4.27 range, including guard bits.
 236  * Fractional lsb is rounded to nearest, ties away from zero.
 237  * See clampq4_27_from_float() for details.
 238  * Parameters:
 239  *  dst     Destination buffer
 240  *  src     Source buffer
 241  *  count   Number of samples to copy
 242  * The destination and source buffers must either be completely separate (non-overlapping), or
 243  * they must both start at the same address.  Partially overlapping buffers are not supported.
 244  */
 245 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
 246
 247 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
 248  * The data is clamped, and truncated without rounding.
 249  * Parameters:
 250  *  dst     Destination buffer
 251  *  src     Source buffer
 252  *  count   Number of samples to copy
 253  * The destination and source buffers must either be completely separate (non-overlapping), or
 254  * they must both start at the same address.  Partially overlapping buffers are not supported.
 255  */
 256 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
 257
 258 /* Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
 259  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 260  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
 261  * No rounding is needed as the representation is exact for nominal values.
 262  * Rounding for overflow values is to nearest, ties to even.
 263  * Parameters:
 264  *  dst     Destination buffer
 265  *  src     Source buffer
 266  *  count   Number of samples to copy
 267  * The destination and source buffers must either be completely separate (non-overlapping), or
 268  * they must both start at the same address.  Partially overlapping buffers are not supported.
 269  */
 270 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
 271
 272 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
 273  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
 274  * Parameters:
 275  *  dst     Destination buffer
 276  *  src     Source buffer
 277  *  count   Number of samples to copy
 278  * The destination and source buffers must be completely separate.
 279  */
 280 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
 281
 282 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
 283  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
 284  * ties away from zero. See clamp32_from_float() for details.
 285  * Parameters:
 286  *  dst     Destination buffer
 287  *  src     Source buffer
 288  *  count   Number of samples to copy
 289  * The destination and source buffers must either be completely separate (non-overlapping), or
 290  * they must both start at the same address.  Partially overlapping buffers are not supported.
 291  */
 292 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
 293
 294 /* Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
 295  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
 296  * Rounding is done according to float_from_i32().
 297  * Parameters:
 298  *  dst     Destination buffer
 299  *  src     Source buffer
 300  *  count   Number of samples to copy
 301  * The destination and source buffers must either be completely separate (non-overlapping), or
 302  * they must both start at the same address.  Partially overlapping buffers are not supported.
 303  */
 304 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
 305
 306 /* Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
 307  * Parameters:
 308  *  dst     Destination buffer
 309  *  src     Source buffer
 310  *  count   Number of stereo frames to downmix
 311  * The destination and source buffers must be completely separate (non-overlapping).
 312  * The current implementation truncates the mean rather than dither, but this may change.
 313  */
 314 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
 315
 316 /* Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
 317  * duplicating.
 318  * Parameters:
 319  *  dst     Destination buffer
 320  *  src     Source buffer
 321  *  count   Number of mono samples to upmix
 322  * The destination and source buffers must be completely separate (non-overlapping).
 323  */
 324 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
 325
 326 /* Return the total number of non-zero 32-bit samples */
 327 size_t nonZeroMono32(const int32_t *samples, size_t count);
 328
 329 /* Return the total number of non-zero 16-bit samples */
 330 size_t nonZeroMono16(const int16_t *samples, size_t count);
 331
 332 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
 333  * if either of its constituent 32-bit samples is non-zero
 334  */
 335 size_t nonZeroStereo32(const int32_t *frames, size_t count);
 336
 337 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
 338  * if either of its constituent 16-bit samples is non-zero
 339  */
 340 size_t nonZeroStereo16(const int16_t *frames, size_t count);
 341
 342 /* Copy frames, selecting source samples based on a source channel mask to fit
 343  * the destination channel mask. Unmatched channels in the destination channel mask
 344  * are zero filled. Unmatched channels in the source channel mask are dropped.
 345  * Channels present in the channel mask are represented by set bits in the
 346  * uint32_t value and are matched without further interpretation.
 347  * Parameters:
 348  *  dst         Destination buffer
 349  *  dst_mask    Bit mask corresponding to destination channels present
 350  *  src         Source buffer
 351  *  src_mask    Bit mask corresponding to source channels present
 352  *  sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 353  *  count       Number of frames to copy
 354  * The destination and source buffers must be completely separate (non-overlapping).
 355  * If the sample size is not in range, the function will abort.
 356  */
 357 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
 358         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
 359
 360 /* Copy frames, selecting source samples based on an index array (idxary).
 361  * The idxary[] consists of dst_channels number of elements.
 362  * The ith element if idxary[] corresponds the ith destination channel.
 363  * A non-negative value is the channel index in the source frame.
 364  * A negative index (-1) represents filling with 0.
 365  *
 366  * Example: Swapping L and R channels for stereo streams
 367  * idxary[0] = 1;
 368  * idxary[1] = 0;
 369  *
 370  * Example: Copying a mono source to the front center 5.1 channel
 371  * idxary[0] = -1;
 372  * idxary[1] = -1;
 373  * idxary[2] = 0;
 374  * idxary[3] = -1;
 375  * idxary[4] = -1;
 376  * idxary[5] = -1;
 377  *
 378  * This copy allows swizzling of channels or replication of channels.
 379  *
 380  * Parameters:
 381  *  dst           Destination buffer
 382  *  dst_channels  Number of destination channels per frame
 383  *  src           Source buffer
 384  *  src_channels  Number of source channels per frame
 385  *  idxary        Array of indices representing channels in the source frame
 386  *  sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 387  *  count         Number of frames to copy
 388  * The destination and source buffers must be completely separate (non-overlapping).
 389  * If the sample size is not in range, the function will abort.
 390  */
 391 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
 392         const void *src, uint32_t src_channels,
 393         const int8_t *idxary, size_t sample_size, size_t count);
 394
 395 /* Prepares an index array (idxary) from channel masks, which can be later
 396  * used by memcpy_by_index_array(). Returns the number of array elements required.
 397  * This may be greater than idxcount, so the return value should be checked
 398  * if idxary size is less than 32. Note that idxary is a caller allocated array
 399  * of at least as many channels as present in the dst_mask.
 400  * Channels present in the channel mask are represented by set bits in the
 401  * uint32_t value and are matched without further interpretation.
 402  *
 403  * This function is typically used for converting audio data with different
 404  * channel position masks.
 405  *
 406  * Parameters:
 407  *  idxary      Updated array of indices of channels in the src frame for the dst frame
 408  *  idxcount    Number of caller allocated elements in idxary
 409  *  dst_mask    Bit mask corresponding to destination channels present
 410  *  src_mask    Bit mask corresponding to source channels present
 411  */
 412 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
 413         uint32_t dst_mask, uint32_t src_mask);
 414
 415 /* Prepares an index array (idxary) from channel masks, which can be later
 416  * used by memcpy_by_index_array(). Returns the number of array elements required.
 417  *
 418  * For a source channel index mask, the source channels will map to the destination
 419  * channels as if counting the set bits in dst_mask in order from lsb to msb
 420  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
 421  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
 422  * bit of the src_mask indicates that the ith destination channel plays silence.
 423  *
 424  * Parameters:
 425  *  idxary      Updated array of indices of channels in the src frame for the dst frame
 426  *  idxcount    Number of caller allocated elements in idxary
 427  *  dst_mask    Bit mask corresponding to destination channels present
 428  *  src_mask    Bit mask corresponding to source channels present
 429  */
 430 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
 431         uint32_t dst_mask, uint32_t src_mask);
 432
 433 /* Prepares an index array (idxary) from channel mask bits, which can be later
 434  * used by memcpy_by_index_array(). Returns the number of array elements required.
 435  *
 436  * This initialization is for a destination channel index mask from a positional
 437  * source mask.
 438  *
 439  * For an destination channel index mask, the input channels will map
 440  * to the destination channels, with the ith SET bit in the source bits corresponding
 441  * to the ith bit in the destination bits. If there is a zero bit in the middle
 442  * of set destination bits (unlikely), the corresponding source channel will
 443  * be dropped.
 444  *
 445  * Parameters:
 446  *  idxary      Updated array of indices of channels in the src frame for the dst frame
 447  *  idxcount    Number of caller allocated elements in idxary
 448  *  dst_mask    Bit mask corresponding to destination channels present
 449  *  src_mask    Bit mask corresponding to source channels present
 450  */
 451 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
 452         uint32_t dst_mask, uint32_t src_mask);
 453
 454 /**
 455  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
 456  */
 457 static inline int16_t clamp16(int32_t sample)
 458 {
 459     if ((sample>>15) ^ (sample>>31))
 460         sample = 0x7FFF ^ (sample>>31);
 461     return sample;
 462 }
 463
 464 /*
 465  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
 466  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
 467  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
 468  *
 469  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
 470  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
 471  * depending on the sign bit inside NaN (whose representation is not unique).
 472  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
 473  *
 474  * Rounding of 0.5 lsb is to even (default for IEEE 754).
 475  */
 476 static inline int16_t clamp16_from_float(float f)
 477 {
 478     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
 479      * floating point significand. The normal shift is 3<<22, but the -15 offset
 480      * is used to multiply by 32768.
 481      */
 482     static const float offset = (float)(3 << (22 - 15));
 483     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
 484     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
 485     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
 486
 487     union {
 488         float f;
 489         int32_t i;
 490     } u;
 491
 492     u.f = f + offset; /* recenter valid range */
 493     /* Now the valid range is represented as integers between [limneg, limpos].
 494      * Clamp using the fact that float representation (as an integer) is an ordered set.
 495      */
 496     if (u.i < limneg)
 497         u.i = -32768;
 498     else if (u.i > limpos)
 499         u.i = 32767;
 500     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
 501 }
 502
 503 /*
 504  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
 505  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
 506  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
 507  *
 508  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
 509  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
 510  * depending on the sign bit inside NaN (whose representation is not unique).
 511  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
 512  *
 513  * Rounding of 0.5 lsb is to even (default for IEEE 754).
 514  */
 515 static inline uint8_t clamp8_from_float(float f)
 516 {
 517     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
 518      * floating point significand. The normal shift is 3<<22, but the -7 offset
 519      * is used to multiply by 128.
 520      */
 521     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
 522     /* zero = (0x11f << 22) =  0x47c00000 */
 523     static const int32_t limneg = (0x11f << 22) /*zero*/;
 524     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
 525
 526     union {
 527         float f;
 528         int32_t i;
 529     } u;
 530
 531     u.f = f + offset; /* recenter valid range */
 532     /* Now the valid range is represented as integers between [limneg, limpos].
 533      * Clamp using the fact that float representation (as an integer) is an ordered set.
 534      */
 535     if (u.i < limneg)
 536         return 0;
 537     if (u.i > limpos)
 538         return 255;
 539     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
 540 }
 541
 542 /* Convert a single-precision floating point value to a Q0.23 integer value, stored in a
 543  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 544  *
 545  * Rounds to nearest, ties away from 0.
 546  *
 547  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
 548  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 549  * depending on hardware and future implementation of this function.
 550  */
 551 static inline int32_t clamp24_from_float(float f)
 552 {
 553     static const float scale = (float)(1 << 23);
 554     static const float limpos = 0x7fffff / scale;
 555     static const float limneg = -0x800000 / scale;
 556
 557     if (f <= limneg) {
 558         return -0x800000;
 559     } else if (f >= limpos) {
 560         return 0x7fffff;
 561     }
 562     f *= scale;
 563     /* integer conversion is through truncation (though int to float is not).
 564      * ensure that we round to nearest, ties away from 0.
 565      */
 566     return f > 0 ? f + 0.5 : f - 0.5;
 567 }
 568
 569 /* Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
 570  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 571  *
 572  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
 573  */
 574 static inline int32_t clamp24_from_q8_23(int32_t ival)
 575 {
 576     static const int32_t limpos = 0x7fffff;
 577     static const int32_t limneg = -0x800000;
 578     if (ival < limneg) {
 579         return limneg;
 580     } else if (ival > limpos) {
 581         return limpos;
 582     } else {
 583         return ival;
 584     }
 585 }
 586
 587 /* Convert a single-precision floating point value to a Q4.27 integer value.
 588  * Rounds to nearest, ties away from 0.
 589  *
 590  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
 591  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 592  * depending on hardware and future implementation of this function.
 593  */
 594 static inline int32_t clampq4_27_from_float(float f)
 595 {
 596     static const float scale = (float)(1UL << 27);
 597     static const float limpos = 16.;
 598     static const float limneg = -16.;
 599
 600     if (f <= limneg) {
 601         return -0x80000000; /* or 0x80000000 */
 602     } else if (f >= limpos) {
 603         return 0x7fffffff;
 604     }
 605     f *= scale;
 606     /* integer conversion is through truncation (though int to float is not).
 607      * ensure that we round to nearest, ties away from 0.
 608      */
 609     return f > 0 ? f + 0.5 : f - 0.5;
 610 }
 611
 612 /* Convert a single-precision floating point value to a Q0.31 integer value.
 613  * Rounds to nearest, ties away from 0.
 614  *
 615  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
 616  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 617  * depending on hardware and future implementation of this function.
 618  */
 619 static inline int32_t clamp32_from_float(float f)
 620 {
 621     static const float scale = (float)(1UL << 31);
 622     static const float limpos = 1.;
 623     static const float limneg = -1.;
 624
 625     if (f <= limneg) {
 626         return -0x80000000; /* or 0x80000000 */
 627     } else if (f >= limpos) {
 628         return 0x7fffffff;
 629     }
 630     f *= scale;
 631     /* integer conversion is through truncation (though int to float is not).
 632      * ensure that we round to nearest, ties away from 0.
 633      */
 634     return f > 0 ? f + 0.5 : f - 0.5;
 635 }
 636
 637 /* Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
 638  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
 639  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
 640  *
 641  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 642  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 643  * precision floating point, the 0.5 lsb in the significand conversion will round
 644  * towards even, as per IEEE 754 default.
 645  */
 646 static inline float float_from_q4_27(int32_t ival)
 647 {
 648     /* The scale factor is the reciprocal of the fractional bits.
 649      *
 650      * Since the scale factor is a power of 2, the scaling is exact, and there
 651      * is no rounding due to the multiplication - the bit pattern is preserved.
 652      * However, there may be rounding due to the fixed-point to float conversion,
 653      * as described above.
 654      */
 655     static const float scale = 1. / (float)(1UL << 27);
 656
 657     return ival * scale;
 658 }
 659
 660 /* Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
 661  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 662  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
 663  *
 664  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 665  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 666  * precision floating point, the 0.5 lsb in the significand conversion will round
 667  * towards even, as per IEEE 754 default.
 668  */
 669 static inline float float_from_u4_28(uint32_t uval)
 670 {
 671     static const float scale = 1. / (float)(1UL << 28);
 672
 673     return uval * scale;
 674 }
 675
 676 /* Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
 677  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 678  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
 679  */
 680 static inline float float_from_u4_12(uint16_t uval)
 681 {
 682     static const float scale = 1. / (float)(1UL << 12);
 683
 684     return uval * scale;
 685 }
 686
 687 /* Convert a single-precision floating point value to a U4.28 integer value.
 688  * Rounds to nearest, ties away from 0.
 689  *
 690  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
 691  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 692  * depending on hardware and future implementation of this function.
 693  */
 694 static inline uint32_t u4_28_from_float(float f)
 695 {
 696     static const float scale = (float)(1 << 28);
 697     static const float limpos = 0xffffffffUL / scale;
 698
 699     if (f <= 0.) {
 700         return 0;
 701     } else if (f >= limpos) {
 702         return 0xffffffff;
 703     }
 704     /* integer conversion is through truncation (though int to float is not).
 705      * ensure that we round to nearest, ties away from 0.
 706      */
 707     return f * scale + 0.5;
 708 }
 709
 710 /* Convert a single-precision floating point value to a U4.12 integer value.
 711  * Rounds to nearest, ties away from 0.
 712  *
 713  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
 714  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 715  * depending on hardware and future implementation of this function.
 716  */
 717 static inline uint16_t u4_12_from_float(float f)
 718 {
 719     static const float scale = (float)(1 << 12);
 720     static const float limpos = 0xffff / scale;
 721
 722     if (f <= 0.) {
 723         return 0;
 724     } else if (f >= limpos) {
 725         return 0xffff;
 726     }
 727     /* integer conversion is through truncation (though int to float is not).
 728      * ensure that we round to nearest, ties away from 0.
 729      */
 730     return f * scale + 0.5;
 731 }
 732
 733 /* Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
 734  * The output float range is [-1.0, 1.0) for the fixed-point range
 735  * [0x8000, 0x7fff].
 736  *
 737  * There is no rounding, the conversion and representation is exact.
 738  */
 739 static inline float float_from_i16(int16_t ival)
 740 {
 741     /* The scale factor is the reciprocal of the nominal 16 bit integer
 742      * half-sided range (32768).
 743      *
 744      * Since the scale factor is a power of 2, the scaling is exact, and there
 745      * is no rounding due to the multiplication - the bit pattern is preserved.
 746      */
 747     static const float scale = 1. / (float)(1UL << 15);
 748
 749     return ival * scale;
 750 }
 751
 752 /* Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
 753  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
 754  * [0x00, 0xff].
 755  */
 756 static inline float float_from_u8(uint8_t uval)
 757 {
 758     static const float scale = 1. / (float)(1UL << 7);
 759
 760     return ((int)uval - 128) * scale;
 761 }
 762
 763 /* Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
 764  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
 765  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
 766  * Even though the output range is limited on the positive side, there is no
 767  * DC offset on the output, if the input has no DC offset.
 768  *
 769  * Avoid relying on the limited output range, as future implementations may go
 770  * to full range.
 771  */
 772 static inline int32_t i32_from_p24(const uint8_t *packed24)
 773 {
 774     /* convert to 32b */
 775     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
 776 }
 777
 778 /* Convert a 32-bit Q0.31 value to single-precision floating-point.
 779  * The output float range is [-1.0, 1.0] for the fixed-point range
 780  * [0x80000000, 0x7fffffff].
 781  *
 782  * Rounding may occur in the least significant 8 bits for large fixed point
 783  * values due to storage into the 24-bit floating-point significand.
 784  * Rounding will be to nearest, ties to even.
 785  */
 786 static inline float float_from_i32(int32_t ival)
 787 {
 788     static const float scale = 1. / (float)(1UL << 31);
 789
 790     return ival * scale;
 791 }
 792
 793 /* Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
 794  * to single-precision floating-point. The output float range is [-1.0, 1.0)
 795  * for the fixed-point range [0x800000, 0x7fffff].
 796  *
 797  * There is no rounding, the conversion and representation is exact.
 798  */
 799 static inline float float_from_p24(const uint8_t *packed24)
 800 {
 801     return float_from_i32(i32_from_p24(packed24));
 802 }
 803
 804 /* Convert a 24-bit Q8.23 value to single-precision floating-point.
 805  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 806  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
 807  *
 808  * There is no rounding in the nominal range, the conversion and representation
 809  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
 810  */
 811 static inline float float_from_q8_23(int32_t ival)
 812 {
 813     static const float scale = 1. / (float)(1UL << 23);
 814
 815     return ival * scale;
 816 }
 817
 818 /**
 819  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
 820  */
 821 static inline
 822 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
 823 {
 824 #if defined(__arm__) && !defined(__thumb__)
 825     int32_t out;
 826     asm( "smlabb %[out], %[in], %[v], %[a] \n"
 827          : [out]"=r"(out)
 828          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
 829          : );
 830     return out;
 831 #else
 832     return a + in * (int32_t)v;
 833 #endif
 834 }
 835
 836 /**
 837  * Multiply 16-bit terms with 32-bit result: return in*v.
 838  */
 839 static inline
 840 int32_t mul(int16_t in, int16_t v)
 841 {
 842 #if defined(__arm__) && !defined(__thumb__)
 843     int32_t out;
 844     asm( "smulbb %[out], %[in], %[v] \n"
 845          : [out]"=r"(out)
 846          : [in]"%r"(in), [v]"r"(v)
 847          : );
 848     return out;
 849 #else
 850     return in * (int32_t)v;
 851 #endif
 852 }
 853
 854 /**
 855  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
 856  */
 857 static inline
 858 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
 859 {
 860 #if defined(__arm__) && !defined(__thumb__)
 861     int32_t out;
 862     if (left) {
 863         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
 864              : [out]"=r"(out)
 865              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
 866              : );
 867     } else {
 868         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
 869              : [out]"=r"(out)
 870              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
 871              : );
 872     }
 873     return out;
 874 #else
 875     if (left) {
 876         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
 877     } else {
 878         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
 879     }
 880 #endif
 881 }
 882
 883 /**
 884  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
 885  */
 886 static inline
 887 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
 888 {
 889 #if defined(__arm__) && !defined(__thumb__)
 890     int32_t out;
 891     if (left) {
 892         asm( "smulbb %[out], %[inRL], %[vRL] \n"
 893              : [out]"=r"(out)
 894              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
 895              : );
 896     } else {
 897         asm( "smultt %[out], %[inRL], %[vRL] \n"
 898              : [out]"=r"(out)
 899              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
 900              : );
 901     }
 902     return out;
 903 #else
 904     if (left) {
 905         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
 906     } else {
 907         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
 908     }
 909 #endif
 910 }
 911
 912 __END_DECLS
 913
 914 #endif  // ANDROID_AUDIO_PRIMITIVES_H