audio_utils/include/audio_utils/primitives.h

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
  18 #define ANDROID_AUDIO_PRIMITIVES_H
  19
  20 #include <stdint.h>
  21 #include <stdlib.h>
  22 #include <sys/cdefs.h>
  23
  24 __BEGIN_DECLS
  25
  26 /* The memcpy_* conversion routines are designed to work in-place on same dst as src
  27  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
  28  * This allows the loops to go upwards for faster cache access (and may be more flexible
  29  * for future optimization later).
  30  */
  31
  32 /**
  33  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
  34  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
  35  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
  36  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
  37  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
  38  * on the 4 integer guard bits.
  39  *
  40  * For interleaved stereo, c is the number of sample pairs,
  41  * and out is an array of interleaved pairs of 16-bit samples per channel.
  42  * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
  43  * The name "dither" is a misnomer; the current implementation does not actually dither
  44  * but uses truncation.  This may change.
  45  * The out and sums buffers must either be completely separate (non-overlapping), or
  46  * they must both start at the same address.  Partially overlapping buffers are not supported.
  47  */
  48 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
  49
  50 /* Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
  51  * Parameters:
  52  *  dst     Destination buffer
  53  *  src     Source buffer
  54  *  count   Number of samples to copy
  55  * The destination and source buffers must either be completely separate (non-overlapping), or
  56  * they must both start at the same address.  Partially overlapping buffers are not supported.
  57  */
  58 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
  59
  60 /* Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
  61  * Parameters:
  62  *  dst     Destination buffer
  63  *  src     Source buffer
  64  *  count   Number of samples to copy
  65  * The destination and source buffers must either be completely separate (non-overlapping), or
  66  * they must both start at the same address.  Partially overlapping buffers are not supported.
  67  * The conversion is done by truncation, without dithering, so it loses resolution.
  68  */
  69 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
  70
  71 /* Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
  72  * Parameters:
  73  *  dst     Destination buffer
  74  *  src     Source buffer
  75  *  count   Number of samples to copy
  76  * The destination and source buffers must either be completely separate (non-overlapping), or
  77  * they must both start at the same address.  Partially overlapping buffers are not supported.
  78  * The conversion is done by truncation, without dithering, so it loses resolution.
  79  */
  80 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
  81
  82 /* Shrink and copy samples from single-precision floating-point to signed 16-bit.
  83  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
  84  * refer to clamp16_from_float().
  85  * Parameters:
  86  *  dst     Destination buffer
  87  *  src     Source buffer
  88  *  count   Number of samples to copy
  89  * The destination and source buffers must either be completely separate (non-overlapping), or
  90  * they must both start at the same address.  Partially overlapping buffers are not supported.
  91  * The conversion is done by truncation, without dithering, so it loses resolution.
  92  */
  93 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
  94
  95 /* Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
  96  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
  97  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
  98  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
  99  * Parameters:
 100  *  dst     Destination buffer
 101  *  src     Source buffer
 102  *  count   Number of samples to copy
 103  * The destination and source buffers must either be completely separate (non-overlapping), or
 104  * they must both start at the same address.  Partially overlapping buffers are not supported.
 105  */
 106 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
 107
 108 /* Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
 109  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
 110  * No rounding is needed as the representation is exact.
 111  * Parameters:
 112  *  dst     Destination buffer
 113  *  src     Source buffer
 114  *  count   Number of samples to copy
 115  * The destination and source buffers must be completely separate.
 116  */
 117 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
 118
 119 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
 120  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
 121  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
 122  * No rounding is needed as the representation is exact.
 123  * Parameters:
 124  *  dst     Destination buffer
 125  *  src     Source buffer
 126  *  count   Number of samples to copy
 127  * The destination and source buffers must be completely separate.
 128  */
 129 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
 130
 131 /* Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
 132  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
 133  * The data is truncated without rounding.
 134  * Parameters:
 135  *  dst     Destination buffer
 136  *  src     Source buffer
 137  *  count   Number of samples to copy
 138  * The destination and source buffers must either be completely separate (non-overlapping), or
 139  * they must both start at the same address.  Partially overlapping buffers are not supported.
 140  */
 141 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
 142
 143 /* Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
 144  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 145  * The output data range is [0x800000, 0x7fff00] (not full).
 146  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
 147  * Parameters:
 148  *  dst     Destination buffer
 149  *  src     Source buffer
 150  *  count   Number of samples to copy
 151  * The destination and source buffers must be completely separate.
 152  */
 153 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
 154
 155 /* Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
 156  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 157  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
 158  * for details.
 159  * Parameters:
 160  *  dst     Destination buffer
 161  *  src     Source buffer
 162  *  count   Number of samples to copy
 163  * The destination and source buffers must either be completely separate (non-overlapping), or
 164  * they must both start at the same address.  Partially overlapping buffers are not supported.
 165  */
 166 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
 167
 168 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
 169  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 170  * The data is clamped to the range is [0x800000, 0x7fffff].
 171  * Parameters:
 172  *  dst     Destination buffer
 173  *  src     Source buffer
 174  *  count   Number of samples to copy
 175  * The destination and source buffers must be completely separate.
 176  */
 177 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
 178
 179 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
 180  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
 181  * Parameters:
 182  *  dst     Destination buffer
 183  *  src     Source buffer
 184  *  count   Number of samples to copy
 185  * The destination and source buffers must be completely separate.
 186  */
 187 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
 188
 189 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
 190  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
 191  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
 192  * See clamp24_from_float() for details.
 193  * Parameters:
 194  *  dst     Destination buffer
 195  *  src     Source buffer
 196  *  count   Number of samples to copy
 197  * The destination and source buffers must either be completely separate (non-overlapping), or
 198  * they must both start at the same address.  Partially overlapping buffers are not supported.
 199  */
 200 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
 201
 202 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
 203  * The conversion will use the full available Q4.27 range, including guard bits.
 204  * Fractional lsb is rounded to nearest, ties away from zero.
 205  * See clampq4_27_from_float() for details.
 206  * Parameters:
 207  *  dst     Destination buffer
 208  *  src     Source buffer
 209  *  count   Number of samples to copy
 210  * The destination and source buffers must either be completely separate (non-overlapping), or
 211  * they must both start at the same address.  Partially overlapping buffers are not supported.
 212  */
 213 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
 214
 215 /* Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
 216  * The data is clamped, and truncated without rounding.
 217  * Parameters:
 218  *  dst     Destination buffer
 219  *  src     Source buffer
 220  *  count   Number of samples to copy
 221  * The destination and source buffers must either be completely separate (non-overlapping), or
 222  * they must both start at the same address.  Partially overlapping buffers are not supported.
 223  */
 224 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
 225
 226 /* Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
 227  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 228  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
 229  * No rounding is needed as the representation is exact for nominal values.
 230  * Rounding for overflow values is to nearest, ties to even.
 231  * Parameters:
 232  *  dst     Destination buffer
 233  *  src     Source buffer
 234  *  count   Number of samples to copy
 235  * The destination and source buffers must either be completely separate (non-overlapping), or
 236  * they must both start at the same address.  Partially overlapping buffers are not supported.
 237  */
 238 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
 239
 240 /* Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
 241  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
 242  * Parameters:
 243  *  dst     Destination buffer
 244  *  src     Source buffer
 245  *  count   Number of samples to copy
 246  * The destination and source buffers must be completely separate.
 247  */
 248 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
 249
 250 /* Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
 251  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
 252  * ties away from zero. See clamp32_from_float() for details.
 253  * Parameters:
 254  *  dst     Destination buffer
 255  *  src     Source buffer
 256  *  count   Number of samples to copy
 257  * The destination and source buffers must either be completely separate (non-overlapping), or
 258  * they must both start at the same address.  Partially overlapping buffers are not supported.
 259  */
 260 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
 261
 262 /* Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
 263  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
 264  * Rounding is done according to float_from_i32().
 265  * Parameters:
 266  *  dst     Destination buffer
 267  *  src     Source buffer
 268  *  count   Number of samples to copy
 269  * The destination and source buffers must either be completely separate (non-overlapping), or
 270  * they must both start at the same address.  Partially overlapping buffers are not supported.
 271  */
 272 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
 273
 274 /* Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
 275  * Parameters:
 276  *  dst     Destination buffer
 277  *  src     Source buffer
 278  *  count   Number of stereo frames to downmix
 279  * The destination and source buffers must be completely separate (non-overlapping).
 280  * The current implementation truncates the mean rather than dither, but this may change.
 281  */
 282 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
 283
 284 /* Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
 285  * duplicating.
 286  * Parameters:
 287  *  dst     Destination buffer
 288  *  src     Source buffer
 289  *  count   Number of mono samples to upmix
 290  * The destination and source buffers must be completely separate (non-overlapping).
 291  */
 292 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
 293
 294 /* Return the total number of non-zero 32-bit samples */
 295 size_t nonZeroMono32(const int32_t *samples, size_t count);
 296
 297 /* Return the total number of non-zero 16-bit samples */
 298 size_t nonZeroMono16(const int16_t *samples, size_t count);
 299
 300 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
 301  * if either of its constituent 32-bit samples is non-zero
 302  */
 303 size_t nonZeroStereo32(const int32_t *frames, size_t count);
 304
 305 /* Return the total number of non-zero stereo frames, where a frame is considered non-zero
 306  * if either of its constituent 16-bit samples is non-zero
 307  */
 308 size_t nonZeroStereo16(const int16_t *frames, size_t count);
 309
 310 /* Copy frames, selecting source samples based on a source channel mask to fit
 311  * the destination channel mask. Unmatched channels in the destination channel mask
 312  * are zero filled. Unmatched channels in the source channel mask are dropped.
 313  * Channels present in the channel mask are represented by set bits in the
 314  * uint32_t value and are matched without further interpretation.
 315  * Parameters:
 316  *  dst         Destination buffer
 317  *  dst_mask    Bit mask corresponding to destination channels present
 318  *  src         Source buffer
 319  *  src_mask    Bit mask corresponding to source channels present
 320  *  sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 321  *  count       Number of frames to copy
 322  * The destination and source buffers must be completely separate (non-overlapping).
 323  * If the sample size is not in range, the function will abort.
 324  */
 325 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
 326         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
 327
 328 /* Copy frames, selecting source samples based on an index array (idxary).
 329  * The idxary[] consists of dst_channels number of elements.
 330  * The ith element if idxary[] corresponds the ith destination channel.
 331  * A non-negative value is the channel index in the source frame.
 332  * A negative index (-1) represents filling with 0.
 333  *
 334  * Example: Swapping L and R channels for stereo streams
 335  * idxary[0] = 1;
 336  * idxary[1] = 0;
 337  *
 338  * Example: Copying a mono source to the front center 5.1 channel
 339  * idxary[0] = -1;
 340  * idxary[1] = -1;
 341  * idxary[2] = 0;
 342  * idxary[3] = -1;
 343  * idxary[4] = -1;
 344  * idxary[5] = -1;
 345  *
 346  * This copy allows swizzling of channels or replication of channels.
 347  *
 348  * Parameters:
 349  *  dst           Destination buffer
 350  *  dst_channels  Number of destination channels per frame
 351  *  src           Source buffer
 352  *  src_channels  Number of source channels per frame
 353  *  idxary        Array of indices representing channels in the source frame
 354  *  sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 355  *  count         Number of frames to copy
 356  * The destination and source buffers must be completely separate (non-overlapping).
 357  * If the sample size is not in range, the function will abort.
 358  */
 359 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
 360         const void *src, uint32_t src_channels,
 361         const int8_t *idxary, size_t sample_size, size_t count);
 362
 363 /* Prepares an index array (idxary) from channel masks, which can be later
 364  * used by memcpy_by_index_array(). Returns the number of array elements required.
 365  * This may be greater than idxcount, so the return value should be checked
 366  * if idxary size is less than 32. Note that idxary is a caller allocated array
 367  * of at least as many channels as present in the dst_mask.
 368  * Channels present in the channel mask are represented by set bits in the
 369  * uint32_t value and are matched without further interpretation.
 370  *
 371  * Parameters:
 372  *  idxary      Updated array of indices of channels in the src frame for the dst frame
 373  *  idxcount    Number of caller allocated elements in idxary
 374  *  dst_mask    Bit mask corresponding to destination channels present
 375  *  src_mask    Bit mask corresponding to source channels present
 376  */
 377 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
 378         uint32_t dst_mask, uint32_t src_mask);
 379
 380 /**
 381  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
 382  */
 383 static inline int16_t clamp16(int32_t sample)
 384 {
 385     if ((sample>>15) ^ (sample>>31))
 386         sample = 0x7FFF ^ (sample>>31);
 387     return sample;
 388 }
 389
 390 /*
 391  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
 392  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
 393  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
 394  *
 395  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
 396  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
 397  * depending on the sign bit inside NaN (whose representation is not unique).
 398  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
 399  *
 400  * Rounding of 0.5 lsb is to even (default for IEEE 754).
 401  */
 402 static inline int16_t clamp16_from_float(float f)
 403 {
 404     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
 405      * floating point significand. The normal shift is 3<<22, but the -15 offset
 406      * is used to multiply by 32768.
 407      */
 408     static const float offset = (float)(3 << (22 - 15));
 409     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
 410     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
 411     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
 412
 413     union {
 414         float f;
 415         int32_t i;
 416     } u;
 417
 418     u.f = f + offset; /* recenter valid range */
 419     /* Now the valid range is represented as integers between [limneg, limpos].
 420      * Clamp using the fact that float representation (as an integer) is an ordered set.
 421      */
 422     if (u.i < limneg)
 423         u.i = -32768;
 424     else if (u.i > limpos)
 425         u.i = 32767;
 426     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
 427 }
 428
 429 /* Convert a single-precision floating point value to a Q0.23 integer value, stored in a
 430  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 431  *
 432  * Rounds to nearest, ties away from 0.
 433  *
 434  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
 435  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 436  * depending on hardware and future implementation of this function.
 437  */
 438 static inline int32_t clamp24_from_float(float f)
 439 {
 440     static const float scale = (float)(1 << 23);
 441     static const float limpos = 0x7fffff / scale;
 442     static const float limneg = -0x800000 / scale;
 443
 444     if (f <= limneg) {
 445         return -0x800000;
 446     } else if (f >= limpos) {
 447         return 0x7fffff;
 448     }
 449     f *= scale;
 450     /* integer conversion is through truncation (though int to float is not).
 451      * ensure that we round to nearest, ties away from 0.
 452      */
 453     return f > 0 ? f + 0.5 : f - 0.5;
 454 }
 455
 456 /* Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
 457  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 458  *
 459  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
 460  */
 461 static inline int32_t clamp24_from_q8_23(int32_t ival)
 462 {
 463     static const int32_t limpos = 0x7fffff;
 464     static const int32_t limneg = -0x800000;
 465     if (ival < limneg) {
 466         return limneg;
 467     } else if (ival > limpos) {
 468         return limpos;
 469     } else {
 470         return ival;
 471     }
 472 }
 473
 474 /* Convert a single-precision floating point value to a Q4.27 integer value.
 475  * Rounds to nearest, ties away from 0.
 476  *
 477  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
 478  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 479  * depending on hardware and future implementation of this function.
 480  */
 481 static inline int32_t clampq4_27_from_float(float f)
 482 {
 483     static const float scale = (float)(1UL << 27);
 484     static const float limpos = 16.;
 485     static const float limneg = -16.;
 486
 487     if (f <= limneg) {
 488         return -0x80000000; /* or 0x80000000 */
 489     } else if (f >= limpos) {
 490         return 0x7fffffff;
 491     }
 492     f *= scale;
 493     /* integer conversion is through truncation (though int to float is not).
 494      * ensure that we round to nearest, ties away from 0.
 495      */
 496     return f > 0 ? f + 0.5 : f - 0.5;
 497 }
 498
 499 /* Convert a single-precision floating point value to a Q0.31 integer value.
 500  * Rounds to nearest, ties away from 0.
 501  *
 502  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
 503  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 504  * depending on hardware and future implementation of this function.
 505  */
 506 static inline int32_t clamp32_from_float(float f)
 507 {
 508     static const float scale = (float)(1UL << 31);
 509     static const float limpos = 1.;
 510     static const float limneg = -1.;
 511
 512     if (f <= limneg) {
 513         return -0x80000000; /* or 0x80000000 */
 514     } else if (f >= limpos) {
 515         return 0x7fffffff;
 516     }
 517     f *= scale;
 518     /* integer conversion is through truncation (though int to float is not).
 519      * ensure that we round to nearest, ties away from 0.
 520      */
 521     return f > 0 ? f + 0.5 : f - 0.5;
 522 }
 523
 524 /* Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
 525  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
 526  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
 527  *
 528  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 529  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 530  * precision floating point, the 0.5 lsb in the significand conversion will round
 531  * towards even, as per IEEE 754 default.
 532  */
 533 static inline float float_from_q4_27(int32_t ival)
 534 {
 535     /* The scale factor is the reciprocal of the fractional bits.
 536      *
 537      * Since the scale factor is a power of 2, the scaling is exact, and there
 538      * is no rounding due to the multiplication - the bit pattern is preserved.
 539      * However, there may be rounding due to the fixed-point to float conversion,
 540      * as described above.
 541      */
 542     static const float scale = 1. / (float)(1UL << 27);
 543
 544     return ival * scale;
 545 }
 546
 547 /* Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
 548  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 549  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
 550  *
 551  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 552  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 553  * precision floating point, the 0.5 lsb in the significand conversion will round
 554  * towards even, as per IEEE 754 default.
 555  */
 556 static inline float float_from_u4_28(uint32_t uval)
 557 {
 558     static const float scale = 1. / (float)(1UL << 28);
 559
 560     return uval * scale;
 561 }
 562
 563 /* Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
 564  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 565  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
 566  */
 567 static inline float float_from_u4_12(uint16_t uval)
 568 {
 569     static const float scale = 1. / (float)(1UL << 12);
 570
 571     return uval * scale;
 572 }
 573
 574 /* Convert a single-precision floating point value to a U4.28 integer value.
 575  * Rounds to nearest, ties away from 0.
 576  *
 577  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
 578  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 579  * depending on hardware and future implementation of this function.
 580  */
 581 static inline uint32_t u4_28_from_float(float f)
 582 {
 583     static const float scale = (float)(1 << 28);
 584     static const float limpos = 0xffffffffUL / scale;
 585
 586     if (f <= 0.) {
 587         return 0;
 588     } else if (f >= limpos) {
 589         return 0xffffffff;
 590     }
 591     /* integer conversion is through truncation (though int to float is not).
 592      * ensure that we round to nearest, ties away from 0.
 593      */
 594     return f * scale + 0.5;
 595 }
 596
 597 /* Convert a single-precision floating point value to a U4.12 integer value.
 598  * Rounds to nearest, ties away from 0.
 599  *
 600  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
 601  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 602  * depending on hardware and future implementation of this function.
 603  */
 604 static inline uint16_t u4_12_from_float(float f)
 605 {
 606     static const float scale = (float)(1 << 12);
 607     static const float limpos = 0xffff / scale;
 608
 609     if (f <= 0.) {
 610         return 0;
 611     } else if (f >= limpos) {
 612         return 0xffff;
 613     }
 614     /* integer conversion is through truncation (though int to float is not).
 615      * ensure that we round to nearest, ties away from 0.
 616      */
 617     return f * scale + 0.5;
 618 }
 619
 620 /* Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
 621  * The output float range is [-1.0, 1.0) for the fixed-point range
 622  * [0x8000, 0x7fff].
 623  *
 624  * There is no rounding, the conversion and representation is exact.
 625  */
 626 static inline float float_from_i16(int16_t ival)
 627 {
 628     /* The scale factor is the reciprocal of the nominal 16 bit integer
 629      * half-sided range (32768).
 630      *
 631      * Since the scale factor is a power of 2, the scaling is exact, and there
 632      * is no rounding due to the multiplication - the bit pattern is preserved.
 633      */
 634     static const float scale = 1. / (float)(1UL << 15);
 635
 636     return ival * scale;
 637 }
 638
 639 /* Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
 640  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
 641  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
 642  * Even though the output range is limited on the positive side, there is no
 643  * DC offset on the output, if the input has no DC offset.
 644  *
 645  * Avoid relying on the limited output range, as future implementations may go
 646  * to full range.
 647  */
 648 static inline int32_t i32_from_p24(const uint8_t *packed24)
 649 {
 650     /* convert to 32b */
 651     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
 652 }
 653
 654 /* Convert a 32-bit Q0.31 value to single-precision floating-point.
 655  * The output float range is [-1.0, 1.0] for the fixed-point range
 656  * [0x80000000, 0x7fffffff].
 657  *
 658  * Rounding may occur in the least significant 8 bits for large fixed point
 659  * values due to storage into the 24-bit floating-point significand.
 660  * Rounding will be to nearest, ties to even.
 661  */
 662 static inline float float_from_i32(int32_t ival)
 663 {
 664     static const float scale = 1. / (float)(1UL << 31);
 665
 666     return ival * scale;
 667 }
 668
 669 /* Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
 670  * to single-precision floating-point. The output float range is [-1.0, 1.0)
 671  * for the fixed-point range [0x800000, 0x7fffff].
 672  *
 673  * There is no rounding, the conversion and representation is exact.
 674  */
 675 static inline float float_from_p24(const uint8_t *packed24)
 676 {
 677     return float_from_i32(i32_from_p24(packed24));
 678 }
 679
 680 /* Convert a 24-bit Q8.23 value to single-precision floating-point.
 681  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 682  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
 683  *
 684  * There is no rounding in the nominal range, the conversion and representation
 685  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
 686  */
 687 static inline float float_from_q8_23(int32_t ival)
 688 {
 689     static const float scale = 1. / (float)(1UL << 23);
 690
 691     return ival * scale;
 692 }
 693
 694 /**
 695  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
 696  */
 697 static inline
 698 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
 699 {
 700 #if defined(__arm__) && !defined(__thumb__)
 701     int32_t out;
 702     asm( "smlabb %[out], %[in], %[v], %[a] \n"
 703          : [out]"=r"(out)
 704          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
 705          : );
 706     return out;
 707 #else
 708     return a + in * (int32_t)v;
 709 #endif
 710 }
 711
 712 /**
 713  * Multiply 16-bit terms with 32-bit result: return in*v.
 714  */
 715 static inline
 716 int32_t mul(int16_t in, int16_t v)
 717 {
 718 #if defined(__arm__) && !defined(__thumb__)
 719     int32_t out;
 720     asm( "smulbb %[out], %[in], %[v] \n"
 721          : [out]"=r"(out)
 722          : [in]"%r"(in), [v]"r"(v)
 723          : );
 724     return out;
 725 #else
 726     return in * (int32_t)v;
 727 #endif
 728 }
 729
 730 /**
 731  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
 732  */
 733 static inline
 734 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
 735 {
 736 #if defined(__arm__) && !defined(__thumb__)
 737     int32_t out;
 738     if (left) {
 739         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
 740              : [out]"=r"(out)
 741              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
 742              : );
 743     } else {
 744         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
 745              : [out]"=r"(out)
 746              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
 747              : );
 748     }
 749     return out;
 750 #else
 751     if (left) {
 752         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
 753     } else {
 754         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
 755     }
 756 #endif
 757 }
 758
 759 /**
 760  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
 761  */
 762 static inline
 763 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
 764 {
 765 #if defined(__arm__) && !defined(__thumb__)
 766     int32_t out;
 767     if (left) {
 768         asm( "smulbb %[out], %[inRL], %[vRL] \n"
 769              : [out]"=r"(out)
 770              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
 771              : );
 772     } else {
 773         asm( "smultt %[out], %[inRL], %[vRL] \n"
 774              : [out]"=r"(out)
 775              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
 776              : );
 777     }
 778     return out;
 779 #else
 780     if (left) {
 781         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
 782     } else {
 783         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
 784     }
 785 #endif
 786 }
 787
 788 __END_DECLS
 789
 790 #endif  // ANDROID_AUDIO_PRIMITIVES_H