audio_utils/include/audio_utils/primitives.h

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
  18 #define ANDROID_AUDIO_PRIMITIVES_H
  19
  20 #include <stdint.h>
  21 #include <stdlib.h>
  22 #include <sys/cdefs.h>
  23
  24 /** \cond */
  25 __BEGIN_DECLS
  26 /** \endcond */
  27
  28 /**
  29  * \file primitives.h
  30  * The memcpy_* conversion routines are designed to work in-place on same dst as src
  31  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
  32  * This allows the loops to go upwards for faster cache access (and may be more flexible
  33  * for future optimization later).
  34  */
  35
  36 /**
  37  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
  38  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
  39  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
  40  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
  41  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
  42  * on the 4 integer guard bits.
  43  *
  44  * For interleaved stereo, c is the number of sample pairs,
  45  * and out is an array of interleaved pairs of 16-bit samples per channel.
  46  * For mono, c is the number of samples / 2, and out is an array of 16-bit samples.
  47  * The name "dither" is a misnomer; the current implementation does not actually dither
  48  * but uses truncation.  This may change.
  49  * The out and sums buffers must either be completely separate (non-overlapping), or
  50  * they must both start at the same address.  Partially overlapping buffers are not supported.
  51  */
  52 void ditherAndClamp(int32_t* out, const int32_t *sums, size_t c);
  53
  54 /**
  55  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
  56  *
  57  *  \param dst     Destination buffer
  58  *  \param src     Source buffer
  59  *  \param count   Number of samples to copy
  60  *
  61  * The destination and source buffers must either be completely separate (non-overlapping), or
  62  * they must both start at the same address.  Partially overlapping buffers are not supported.
  63  */
  64 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
  65
  66 /**
  67  * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
  68  *
  69  *  \param dst     Destination buffer
  70  *  \param src     Source buffer
  71  *  \param count   Number of samples to copy
  72  *
  73  * The destination and source buffers must either be completely separate (non-overlapping), or
  74  * they must both start at the same address.  Partially overlapping buffers are not supported.
  75  * The conversion is done by truncation, without dithering, so it loses resolution.
  76  */
  77 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
  78
  79 /**
  80  * Copy samples from float to unsigned 8-bit offset by 0x80.
  81  *
  82  *  \param dst     Destination buffer
  83  *  \param src     Source buffer
  84  *  \param count   Number of samples to copy
  85  *
  86  * The destination and source buffers must either be completely separate (non-overlapping), or
  87  * they must both start at the same address.  Partially overlapping buffers are not supported.
  88  * The conversion is done by truncation, without dithering, so it loses resolution.
  89  */
  90 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
  91
  92 /**
  93  * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
  94  *
  95  *  \param dst     Destination buffer
  96  *  \param src     Source buffer
  97  *  \param count   Number of samples to copy
  98  *
  99  * The destination and source buffers must either be completely separate (non-overlapping), or
 100  * they must both start at the same address.  Partially overlapping buffers are not supported.
 101  * The conversion is done by truncation, without dithering, so it loses resolution.
 102  */
 103 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
 104
 105 /**
 106  * Shrink and copy samples from single-precision floating-point to signed 16-bit.
 107  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
 108  * refer to clamp16_from_float().
 109  *
 110  *  \param dst     Destination buffer
 111  *  \param src     Source buffer
 112  *  \param count   Number of samples to copy
 113  *
 114  * The destination and source buffers must either be completely separate (non-overlapping), or
 115  * they must both start at the same address.  Partially overlapping buffers are not supported.
 116  * The conversion is done by truncation, without dithering, so it loses resolution.
 117  */
 118 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
 119
 120 /**
 121  * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
 122  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
 123  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
 124  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
 125  *
 126  *  \param dst     Destination buffer
 127  *  \param src     Source buffer
 128  *  \param count   Number of samples to copy
 129  *
 130  * The destination and source buffers must either be completely separate (non-overlapping), or
 131  * they must both start at the same address.  Partially overlapping buffers are not supported.
 132  */
 133 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
 134
 135 /**
 136  * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
 137  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
 138  * No rounding is needed as the representation is exact.
 139  *
 140  *  \param dst     Destination buffer
 141  *  \param src     Source buffer
 142  *  \param count   Number of samples to copy
 143  *
 144  * The destination and source buffers must be completely separate.
 145  */
 146 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
 147
 148 /**
 149  * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
 150  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
 151  * No rounding is needed as the representation is exact.
 152  *
 153  *  \param dst     Destination buffer
 154  *  \param src     Source buffer
 155  *  \param count   Number of samples to copy
 156  *
 157  * The destination and source buffers must be completely separate.
 158  */
 159 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
 160
 161 /**
 162  * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
 163  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
 164  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
 165  * No rounding is needed as the representation is exact.
 166  *
 167  *  \param dst     Destination buffer
 168  *  \param src     Source buffer
 169  *  \param count   Number of samples to copy
 170  *
 171  * The destination and source buffers must be completely separate.
 172  */
 173 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
 174
 175 /**
 176  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
 177  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
 178  * The data is truncated without rounding.
 179  *
 180  *  \param dst     Destination buffer
 181  *  \param src     Source buffer
 182  *  \param count   Number of samples to copy
 183  *
 184  * The destination and source buffers must either be completely separate (non-overlapping), or
 185  * they must both start at the same address.  Partially overlapping buffers are not supported.
 186  */
 187 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
 188
 189 /**
 190  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
 191  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
 192  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
 193  *
 194  *  \param dst     Destination buffer
 195  *  \param src     Source buffer
 196  *  \param count   Number of samples to copy
 197  *
 198  * The destination and source buffers must be completely separate.
 199  */
 200 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
 201
 202 /**
 203  * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
 204  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 205  * The output data range is [0x800000, 0x7fff00] (not full).
 206  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
 207  *
 208  *  \param dst     Destination buffer
 209  *  \param src     Source buffer
 210  *  \param count   Number of samples to copy
 211  *
 212  * The destination and source buffers must be completely separate.
 213  */
 214 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
 215
 216 /**
 217  * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
 218  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 219  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
 220  * for details.
 221  *
 222  *  \param dst     Destination buffer
 223  *  \param src     Source buffer
 224  *  \param count   Number of samples to copy
 225  *
 226  * The destination and source buffers must either be completely separate (non-overlapping), or
 227  * they must both start at the same address.  Partially overlapping buffers are not supported.
 228  */
 229 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
 230
 231 /**
 232  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
 233  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 234  * The data is clamped to the range is [0x800000, 0x7fffff].
 235  *
 236  *  \param dst     Destination buffer
 237  *  \param src     Source buffer
 238  *  \param count   Number of samples to copy
 239  *
 240  * The destination and source buffers must be completely separate.
 241  */
 242 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
 243
 244 /**
 245  * Shrink and copy samples from signed 32-bit fixed-point Q0.31
 246  * to signed fixed-point packed 24 bit Q0.23.
 247  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
 248  *
 249  *  \param dst     Destination buffer
 250  *  \param src     Source buffer
 251  *  \param count   Number of samples to copy
 252  *
 253  * The destination and source buffers must either be completely separate (non-overlapping), or
 254  * they must both start at the same address.  Partially overlapping buffers are not supported.
 255  * The conversion is done by truncation, without dithering, so it loses resolution.
 256  */
 257 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
 258
 259 /**
 260  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
 261  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
 262  *
 263  *  \param dst     Destination buffer
 264  *  \param src     Source buffer
 265  *  \param count   Number of samples to copy
 266  *
 267  * The destination and source buffers must be completely separate.
 268  */
 269 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
 270
 271 /**
 272  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
 273  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
 274  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
 275  * See clamp24_from_float() for details.
 276  *
 277  *  \param dst     Destination buffer
 278  *  \param src     Source buffer
 279  *  \param count   Number of samples to copy
 280  *
 281  * The destination and source buffers must either be completely separate (non-overlapping), or
 282  * they must both start at the same address.  Partially overlapping buffers are not supported.
 283  */
 284 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
 285
 286 /**
 287  * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
 288  * The output data range is [0xff800000, 0x007fffff].
 289  *
 290  *  \param dst     Destination buffer
 291  *  \param src     Source buffer
 292  *  \param count   Number of samples to copy
 293  *
 294  * The destination and source buffers must be completely separate.
 295  */
 296 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
 297
 298 /**
 299  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
 300  * The conversion will use the full available Q4.27 range, including guard bits.
 301  * Fractional lsb is rounded to nearest, ties away from zero.
 302  * See clampq4_27_from_float() for details.
 303  *
 304  *  \param dst     Destination buffer
 305  *  \param src     Source buffer
 306  *  \param count   Number of samples to copy
 307  *
 308  * The destination and source buffers must either be completely separate (non-overlapping), or
 309  * they must both start at the same address.  Partially overlapping buffers are not supported.
 310  */
 311 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
 312
 313 /**
 314  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
 315  * The data is clamped, and truncated without rounding.
 316  *
 317  *  \param dst     Destination buffer
 318  *  \param src     Source buffer
 319  *  \param count   Number of samples to copy
 320  *
 321  * The destination and source buffers must either be completely separate (non-overlapping), or
 322  * they must both start at the same address.  Partially overlapping buffers are not supported.
 323  */
 324 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
 325
 326 /**
 327  * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
 328  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 329  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
 330  * No rounding is needed as the representation is exact for nominal values.
 331  * Rounding for overflow values is to nearest, ties to even.
 332  *
 333  *  \param dst     Destination buffer
 334  *  \param src     Source buffer
 335  *  \param count   Number of samples to copy
 336  *
 337  * The destination and source buffers must either be completely separate (non-overlapping), or
 338  * they must both start at the same address.  Partially overlapping buffers are not supported.
 339  */
 340 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
 341
 342 /**
 343  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
 344  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
 345  *
 346  *  \param dst     Destination buffer
 347  *  \param src     Source buffer
 348  *  \param count   Number of samples to copy
 349  *
 350  * The destination and source buffers must be completely separate.
 351  */
 352 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
 353
 354 /**
 355  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
 356  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
 357  * ties away from zero. See clamp32_from_float() for details.
 358  *
 359  *  \param dst     Destination buffer
 360  *  \param src     Source buffer
 361  *  \param count   Number of samples to copy
 362  *
 363  * The destination and source buffers must either be completely separate (non-overlapping), or
 364  * they must both start at the same address.  Partially overlapping buffers are not supported.
 365  */
 366 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
 367
 368 /**
 369  * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
 370  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
 371  * Rounding is done according to float_from_i32().
 372  *
 373  *  \param dst     Destination buffer
 374  *  \param src     Source buffer
 375  *  \param count   Number of samples to copy
 376  *
 377  * The destination and source buffers must either be completely separate (non-overlapping), or
 378  * they must both start at the same address.  Partially overlapping buffers are not supported.
 379  */
 380 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
 381
 382 /**
 383  * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
 384  *
 385  *  \param dst     Destination buffer
 386  *  \param src     Source buffer
 387  *  \param count   Number of stereo frames to downmix
 388  *
 389  * The destination and source buffers must be completely separate (non-overlapping).
 390  * The current implementation truncates the mean rather than dither, but this may change.
 391  */
 392 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
 393
 394 /**
 395  * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
 396  * duplicating.
 397  *
 398  *  \param dst     Destination buffer
 399  *  \param src     Source buffer
 400  *  \param count   Number of mono samples to upmix
 401  *
 402  * The destination and source buffers must be completely separate (non-overlapping).
 403  */
 404 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
 405
 406 /**
 407  * Downmix pairs of interleaved stereo input float samples to mono output float samples
 408  * by averaging the stereo pair together.
 409  *
 410  *  \param dst     Destination buffer
 411  *  \param src     Source buffer
 412  *  \param count   Number of stereo frames to downmix
 413  *
 414  * The destination and source buffers must be completely separate (non-overlapping),
 415  * or they must both start at the same address.
 416  */
 417 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
 418
 419 /**
 420  * Upmix mono input float samples to pairs of interleaved stereo output float samples by
 421  * duplicating.
 422  *
 423  *  \param dst     Destination buffer
 424  *  \param src     Source buffer
 425  *  \param count   Number of mono samples to upmix
 426  *
 427  * The destination and source buffers must be completely separate (non-overlapping).
 428  */
 429 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
 430
 431 /**
 432  * \return the total number of non-zero 32-bit samples.
 433  */
 434 size_t nonZeroMono32(const int32_t *samples, size_t count);
 435
 436 /**
 437  * \return the total number of non-zero 16-bit samples.
 438  */
 439 size_t nonZeroMono16(const int16_t *samples, size_t count);
 440
 441 /**
 442  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
 443  * if either of its constituent 32-bit samples is non-zero.
 444  */
 445 size_t nonZeroStereo32(const int32_t *frames, size_t count);
 446
 447 /**
 448  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
 449  * if either of its constituent 16-bit samples is non-zero.
 450  */
 451 size_t nonZeroStereo16(const int16_t *frames, size_t count);
 452
 453 /**
 454  * Copy frames, selecting source samples based on a source channel mask to fit
 455  * the destination channel mask. Unmatched channels in the destination channel mask
 456  * are zero filled. Unmatched channels in the source channel mask are dropped.
 457  * Channels present in the channel mask are represented by set bits in the
 458  * uint32_t value and are matched without further interpretation.
 459  *
 460  *  \param dst         Destination buffer
 461  *  \param dst_mask    Bit mask corresponding to destination channels present
 462  *  \param src         Source buffer
 463  *  \param src_mask    Bit mask corresponding to source channels present
 464  *  \param sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 465  *  \param count       Number of frames to copy
 466  *
 467  * The destination and source buffers must be completely separate (non-overlapping).
 468  * If the sample size is not in range, the function will abort.
 469  */
 470 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
 471         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
 472
 473 /**
 474  * Copy frames, selecting source samples based on an index array (idxary).
 475  * The idxary[] consists of dst_channels number of elements.
 476  * The ith element if idxary[] corresponds the ith destination channel.
 477  * A non-negative value is the channel index in the source frame.
 478  * A negative index (-1) represents filling with 0.
 479  *
 480  * Example: Swapping L and R channels for stereo streams
 481  * <PRE>
 482  * idxary[0] = 1;
 483  * idxary[1] = 0;
 484  * </PRE>
 485  *
 486  * Example: Copying a mono source to the front center 5.1 channel
 487  * <PRE>
 488  * idxary[0] = -1;
 489  * idxary[1] = -1;
 490  * idxary[2] = 0;
 491  * idxary[3] = -1;
 492  * idxary[4] = -1;
 493  * idxary[5] = -1;
 494  * </PRE>
 495  *
 496  * This copy allows swizzling of channels or replication of channels.
 497  *
 498  *  \param dst           Destination buffer
 499  *  \param dst_channels  Number of destination channels per frame
 500  *  \param src           Source buffer
 501  *  \param src_channels  Number of source channels per frame
 502  *  \param idxary        Array of indices representing channels in the source frame
 503  *  \param sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
 504  *  \param count         Number of frames to copy
 505  *
 506  * The destination and source buffers must be completely separate (non-overlapping).
 507  * If the sample size is not in range, the function will abort.
 508  */
 509 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
 510         const void *src, uint32_t src_channels,
 511         const int8_t *idxary, size_t sample_size, size_t count);
 512
 513 /**
 514  * Prepares an index array (idxary) from channel masks, which can be later
 515  * used by memcpy_by_index_array().
 516  *
 517  * \return the number of array elements required.
 518  * This may be greater than idxcount, so the return value should be checked
 519  * if idxary size is less than 32.
 520  *
 521  * Note that idxary is a caller allocated array
 522  * of at least as many channels as present in the dst_mask.
 523  * Channels present in the channel mask are represented by set bits in the
 524  * uint32_t value and are matched without further interpretation.
 525  *
 526  * This function is typically used for converting audio data with different
 527  * channel position masks.
 528  *
 529  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
 530  *  \param idxcount    Number of caller allocated elements in idxary
 531  *  \param dst_mask    Bit mask corresponding to destination channels present
 532  *  \param src_mask    Bit mask corresponding to source channels present
 533  */
 534 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
 535         uint32_t dst_mask, uint32_t src_mask);
 536
 537 /**
 538  * Prepares an index array (idxary) from channel masks, which can be later
 539  * used by memcpy_by_index_array().
 540  *
 541  * \return the number of array elements required.
 542  *
 543  * For a source channel index mask, the source channels will map to the destination
 544  * channels as if counting the set bits in dst_mask in order from lsb to msb
 545  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
 546  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
 547  * bit of the src_mask indicates that the ith destination channel plays silence.
 548  *
 549  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
 550  *  \param idxcount    Number of caller allocated elements in idxary
 551  *  \param dst_mask    Bit mask corresponding to destination channels present
 552  *  \param src_mask    Bit mask corresponding to source channels present
 553  */
 554 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
 555         uint32_t dst_mask, uint32_t src_mask);
 556
 557 /**
 558  * Prepares an index array (idxary) from channel mask bits, which can be later
 559  * used by memcpy_by_index_array().
 560  *
 561  * \return the number of array elements required.
 562  *
 563  * This initialization is for a destination channel index mask from a positional
 564  * source mask.
 565  *
 566  * For an destination channel index mask, the input channels will map
 567  * to the destination channels, with the ith SET bit in the source bits corresponding
 568  * to the ith bit in the destination bits. If there is a zero bit in the middle
 569  * of set destination bits (unlikely), the corresponding source channel will
 570  * be dropped.
 571  *
 572  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
 573  *  \param idxcount    Number of caller allocated elements in idxary
 574  *  \param dst_mask    Bit mask corresponding to destination channels present
 575  *  \param src_mask    Bit mask corresponding to source channels present
 576  */
 577 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
 578         uint32_t dst_mask, uint32_t src_mask);
 579
 580 /**
 581  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
 582  */
 583 static inline int16_t clamp16(int32_t sample)
 584 {
 585     if ((sample>>15) ^ (sample>>31))
 586         sample = 0x7FFF ^ (sample>>31);
 587     return sample;
 588 }
 589
 590 /**
 591  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
 592  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
 593  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
 594  *
 595  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
 596  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
 597  * depending on the sign bit inside NaN (whose representation is not unique).
 598  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
 599  *
 600  * Rounding of 0.5 lsb is to even (default for IEEE 754).
 601  */
 602 static inline int16_t clamp16_from_float(float f)
 603 {
 604     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
 605      * floating point significand. The normal shift is 3<<22, but the -15 offset
 606      * is used to multiply by 32768.
 607      */
 608     static const float offset = (float)(3 << (22 - 15));
 609     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
 610     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
 611     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
 612
 613     union {
 614         float f;
 615         int32_t i;
 616     } u;
 617
 618     u.f = f + offset; /* recenter valid range */
 619     /* Now the valid range is represented as integers between [limneg, limpos].
 620      * Clamp using the fact that float representation (as an integer) is an ordered set.
 621      */
 622     if (u.i < limneg)
 623         u.i = -32768;
 624     else if (u.i > limpos)
 625         u.i = 32767;
 626     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
 627 }
 628
 629 /**
 630  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
 631  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
 632  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
 633  *
 634  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
 635  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
 636  * depending on the sign bit inside NaN (whose representation is not unique).
 637  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
 638  *
 639  * Rounding of 0.5 lsb is to even (default for IEEE 754).
 640  */
 641 static inline uint8_t clamp8_from_float(float f)
 642 {
 643     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
 644      * floating point significand. The normal shift is 3<<22, but the -7 offset
 645      * is used to multiply by 128.
 646      */
 647     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
 648     /* zero = (0x11f << 22) =  0x47c00000 */
 649     static const int32_t limneg = (0x11f << 22) /*zero*/;
 650     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
 651
 652     union {
 653         float f;
 654         int32_t i;
 655     } u;
 656
 657     u.f = f + offset; /* recenter valid range */
 658     /* Now the valid range is represented as integers between [limneg, limpos].
 659      * Clamp using the fact that float representation (as an integer) is an ordered set.
 660      */
 661     if (u.i < limneg)
 662         return 0;
 663     if (u.i > limpos)
 664         return 255;
 665     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
 666 }
 667
 668 /**
 669  * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
 670  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 671  *
 672  * Rounds to nearest, ties away from 0.
 673  *
 674  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
 675  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 676  * depending on hardware and future implementation of this function.
 677  */
 678 static inline int32_t clamp24_from_float(float f)
 679 {
 680     static const float scale = (float)(1 << 23);
 681     static const float limpos = 0x7fffff / scale;
 682     static const float limneg = -0x800000 / scale;
 683
 684     if (f <= limneg) {
 685         return -0x800000;
 686     } else if (f >= limpos) {
 687         return 0x7fffff;
 688     }
 689     f *= scale;
 690     /* integer conversion is through truncation (though int to float is not).
 691      * ensure that we round to nearest, ties away from 0.
 692      */
 693     return f > 0 ? f + 0.5 : f - 0.5;
 694 }
 695
 696 /**
 697  * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
 698  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
 699  *
 700  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
 701  */
 702 static inline int32_t clamp24_from_q8_23(int32_t ival)
 703 {
 704     static const int32_t limpos = 0x7fffff;
 705     static const int32_t limneg = -0x800000;
 706     if (ival < limneg) {
 707         return limneg;
 708     } else if (ival > limpos) {
 709         return limpos;
 710     } else {
 711         return ival;
 712     }
 713 }
 714
 715 /**
 716  * Convert a single-precision floating point value to a Q4.27 integer value.
 717  * Rounds to nearest, ties away from 0.
 718  *
 719  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
 720  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 721  * depending on hardware and future implementation of this function.
 722  */
 723 static inline int32_t clampq4_27_from_float(float f)
 724 {
 725     static const float scale = (float)(1UL << 27);
 726     static const float limpos = 16.;
 727     static const float limneg = -16.;
 728
 729     if (f <= limneg) {
 730         return -0x80000000; /* or 0x80000000 */
 731     } else if (f >= limpos) {
 732         return 0x7fffffff;
 733     }
 734     f *= scale;
 735     /* integer conversion is through truncation (though int to float is not).
 736      * ensure that we round to nearest, ties away from 0.
 737      */
 738     return f > 0 ? f + 0.5 : f - 0.5;
 739 }
 740
 741 /**
 742  * Convert a single-precision floating point value to a Q0.31 integer value.
 743  * Rounds to nearest, ties away from 0.
 744  *
 745  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
 746  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 747  * depending on hardware and future implementation of this function.
 748  */
 749 static inline int32_t clamp32_from_float(float f)
 750 {
 751     static const float scale = (float)(1UL << 31);
 752     static const float limpos = 1.;
 753     static const float limneg = -1.;
 754
 755     if (f <= limneg) {
 756         return -0x80000000; /* or 0x80000000 */
 757     } else if (f >= limpos) {
 758         return 0x7fffffff;
 759     }
 760     f *= scale;
 761     /* integer conversion is through truncation (though int to float is not).
 762      * ensure that we round to nearest, ties away from 0.
 763      */
 764     return f > 0 ? f + 0.5 : f - 0.5;
 765 }
 766
 767 /**
 768  * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
 769  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
 770  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
 771  *
 772  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 773  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 774  * precision floating point, the 0.5 lsb in the significand conversion will round
 775  * towards even, as per IEEE 754 default.
 776  */
 777 static inline float float_from_q4_27(int32_t ival)
 778 {
 779     /* The scale factor is the reciprocal of the fractional bits.
 780      *
 781      * Since the scale factor is a power of 2, the scaling is exact, and there
 782      * is no rounding due to the multiplication - the bit pattern is preserved.
 783      * However, there may be rounding due to the fixed-point to float conversion,
 784      * as described above.
 785      */
 786     static const float scale = 1. / (float)(1UL << 27);
 787
 788     return ival * scale;
 789 }
 790
 791 /**
 792  * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
 793  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 794  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
 795  *
 796  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
 797  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
 798  * precision floating point, the 0.5 lsb in the significand conversion will round
 799  * towards even, as per IEEE 754 default.
 800  */
 801 static inline float float_from_u4_28(uint32_t uval)
 802 {
 803     static const float scale = 1. / (float)(1UL << 28);
 804
 805     return uval * scale;
 806 }
 807
 808 /**
 809  * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
 810  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
 811  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
 812  */
 813 static inline float float_from_u4_12(uint16_t uval)
 814 {
 815     static const float scale = 1. / (float)(1UL << 12);
 816
 817     return uval * scale;
 818 }
 819
 820 /**
 821  * Convert a single-precision floating point value to a U4.28 integer value.
 822  * Rounds to nearest, ties away from 0.
 823  *
 824  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
 825  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 826  * depending on hardware and future implementation of this function.
 827  */
 828 static inline uint32_t u4_28_from_float(float f)
 829 {
 830     static const float scale = (float)(1 << 28);
 831     static const float limpos = 0xffffffffUL / scale;
 832
 833     if (f <= 0.) {
 834         return 0;
 835     } else if (f >= limpos) {
 836         return 0xffffffff;
 837     }
 838     /* integer conversion is through truncation (though int to float is not).
 839      * ensure that we round to nearest, ties away from 0.
 840      */
 841     return f * scale + 0.5;
 842 }
 843
 844 /**
 845  * Convert a single-precision floating point value to a U4.12 integer value.
 846  * Rounds to nearest, ties away from 0.
 847  *
 848  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
 849  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
 850  * depending on hardware and future implementation of this function.
 851  */
 852 static inline uint16_t u4_12_from_float(float f)
 853 {
 854     static const float scale = (float)(1 << 12);
 855     static const float limpos = 0xffff / scale;
 856
 857     if (f <= 0.) {
 858         return 0;
 859     } else if (f >= limpos) {
 860         return 0xffff;
 861     }
 862     /* integer conversion is through truncation (though int to float is not).
 863      * ensure that we round to nearest, ties away from 0.
 864      */
 865     return f * scale + 0.5;
 866 }
 867
 868 /**
 869  * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
 870  * The output float range is [-1.0, 1.0) for the fixed-point range
 871  * [0x8000, 0x7fff].
 872  *
 873  * There is no rounding, the conversion and representation is exact.
 874  */
 875 static inline float float_from_i16(int16_t ival)
 876 {
 877     /* The scale factor is the reciprocal of the nominal 16 bit integer
 878      * half-sided range (32768).
 879      *
 880      * Since the scale factor is a power of 2, the scaling is exact, and there
 881      * is no rounding due to the multiplication - the bit pattern is preserved.
 882      */
 883     static const float scale = 1. / (float)(1UL << 15);
 884
 885     return ival * scale;
 886 }
 887
 888 /**
 889  * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
 890  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
 891  * [0x00, 0xff].
 892  */
 893 static inline float float_from_u8(uint8_t uval)
 894 {
 895     static const float scale = 1. / (float)(1UL << 7);
 896
 897     return ((int)uval - 128) * scale;
 898 }
 899
 900 /**
 901  * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
 902  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
 903  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
 904  * Even though the output range is limited on the positive side, there is no
 905  * DC offset on the output, if the input has no DC offset.
 906  *
 907  * Avoid relying on the limited output range, as future implementations may go
 908  * to full range.
 909  */
 910 static inline int32_t i32_from_p24(const uint8_t *packed24)
 911 {
 912     /* convert to 32b */
 913     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
 914 }
 915
 916 /**
 917  * Convert a 32-bit Q0.31 value to single-precision floating-point.
 918  * The output float range is [-1.0, 1.0] for the fixed-point range
 919  * [0x80000000, 0x7fffffff].
 920  *
 921  * Rounding may occur in the least significant 8 bits for large fixed point
 922  * values due to storage into the 24-bit floating-point significand.
 923  * Rounding will be to nearest, ties to even.
 924  */
 925 static inline float float_from_i32(int32_t ival)
 926 {
 927     static const float scale = 1. / (float)(1UL << 31);
 928
 929     return ival * scale;
 930 }
 931
 932 /**
 933  * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
 934  * to single-precision floating-point. The output float range is [-1.0, 1.0)
 935  * for the fixed-point range [0x800000, 0x7fffff].
 936  *
 937  * There is no rounding, the conversion and representation is exact.
 938  */
 939 static inline float float_from_p24(const uint8_t *packed24)
 940 {
 941     return float_from_i32(i32_from_p24(packed24));
 942 }
 943
 944 /**
 945  * Convert a 24-bit Q8.23 value to single-precision floating-point.
 946  * The nominal output float range is [-1.0, 1.0) for the fixed-point
 947  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
 948  *
 949  * There is no rounding in the nominal range, the conversion and representation
 950  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
 951  */
 952 static inline float float_from_q8_23(int32_t ival)
 953 {
 954     static const float scale = 1. / (float)(1UL << 23);
 955
 956     return ival * scale;
 957 }
 958
 959 /**
 960  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
 961  */
 962 static inline
 963 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
 964 {
 965 #if defined(__arm__) && !defined(__thumb__)
 966     int32_t out;
 967     asm( "smlabb %[out], %[in], %[v], %[a] \n"
 968          : [out]"=r"(out)
 969          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
 970          : );
 971     return out;
 972 #else
 973     return a + in * (int32_t)v;
 974 #endif
 975 }
 976
 977 /**
 978  * Multiply 16-bit terms with 32-bit result: return in*v.
 979  */
 980 static inline
 981 int32_t mul(int16_t in, int16_t v)
 982 {
 983 #if defined(__arm__) && !defined(__thumb__)
 984     int32_t out;
 985     asm( "smulbb %[out], %[in], %[v] \n"
 986          : [out]"=r"(out)
 987          : [in]"%r"(in), [v]"r"(v)
 988          : );
 989     return out;
 990 #else
 991     return in * (int32_t)v;
 992 #endif
 993 }
 994
 995 /**
 996  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
 997  */
 998 static inline
 999 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1000 {
1001 #if defined(__arm__) && !defined(__thumb__)
1002     int32_t out;
1003     if (left) {
1004         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1005              : [out]"=r"(out)
1006              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1007              : );
1008     } else {
1009         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1010              : [out]"=r"(out)
1011              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1012              : );
1013     }
1014     return out;
1015 #else
1016     if (left) {
1017         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1018     } else {
1019         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1020     }
1021 #endif
1022 }
1023
1024 /**
1025  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1026  */
1027 static inline
1028 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1029 {
1030 #if defined(__arm__) && !defined(__thumb__)
1031     int32_t out;
1032     if (left) {
1033         asm( "smulbb %[out], %[inRL], %[vRL] \n"
1034              : [out]"=r"(out)
1035              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1036              : );
1037     } else {
1038         asm( "smultt %[out], %[inRL], %[vRL] \n"
1039              : [out]"=r"(out)
1040              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1041              : );
1042     }
1043     return out;
1044 #else
1045     if (left) {
1046         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1047     } else {
1048         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1049     }
1050 #endif
1051 }
1052
1053 /** \cond */
1054 __END_DECLS
1055 /** \endcond */
1056
1057 #endif  // ANDROID_AUDIO_PRIMITIVES_H