OSDN Git Service

sbc: SBC encoder scale factors calculation optimized with __builtin_clz
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>
Thu, 29 Jan 2009 00:17:36 +0000 (02:17 +0200)
committerMarcel Holtmann <marcel@holtmann.org>
Mon, 30 Jul 2012 02:48:28 +0000 (19:48 -0700)
Count leading zeros operation is often implemented using a special
instruction for it on various architectures (at least this is true
for ARM and x86). Using __builtin_clz gcc intrinsic allows to
eliminate innermost loop in scale factors calculation and improve
performance. Also scale factors calculation can be optimized even
more using SIMD instructions.

sbc/sbc.c
sbc/sbc_primitives.c
sbc/sbc_primitives.h

index 365ee1f..8a2d782 100644 (file)
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -77,7 +77,7 @@ struct sbc_frame {
        uint8_t joint;
 
        /* only the lower 4 bits of every element are to be used */
-       uint8_t scale_factor[2][8];
+       uint32_t scale_factor[2][8];
 
        /* raw integer subband samples in the frame */
        int32_t SBC_ALIGNED sb_sample_f[16][2][8];
@@ -745,8 +745,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(
        uint32_t levels[2][8];  /* levels are derived from that */
        uint32_t sb_sample_delta[2][8];
 
-       u_int32_t scalefactor[2][8];    /* derived from frame->scale_factor */
-
        data[0] = SBC_SYNCWORD;
 
        data[1] = (frame->frequency & 0x03) << 6;
@@ -785,19 +783,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal(
        crc_header[1] = data[2];
        crc_pos = 16;
 
-       for (ch = 0; ch < frame_channels; ch++) {
-               for (sb = 0; sb < frame_subbands; sb++) {
-                       frame->scale_factor[ch][sb] = 0;
-                       scalefactor[ch][sb] = 2 << SCALE_OUT_BITS;
-                       for (blk = 0; blk < frame->blocks; blk++) {
-                               while (scalefactor[ch][sb] < fabs(frame->sb_sample_f[blk][ch][sb])) {
-                                       frame->scale_factor[ch][sb]++;
-                                       scalefactor[ch][sb] *= 2;
-                               }
-                       }
-               }
-       }
-
        if (frame->mode == JOINT_STEREO) {
                /* like frame->sb_sample but joint stereo */
                int32_t sb_sample_j[16][2];
@@ -1115,6 +1100,10 @@ int sbc_encode(sbc_t *sbc, void *input, int input_len, void *output,
 
        samples = sbc_analyze_audio(&priv->enc_state, &priv->frame);
 
+       priv->enc_state.sbc_calc_scalefactors(
+               priv->frame.sb_sample_f, priv->frame.scale_factor,
+               priv->frame.blocks, priv->frame.channels, priv->frame.subbands);
+
        framelen = sbc_pack_frame(output, &priv->frame, output_len);
 
        if (written)
index 338feb9..303f3fe 100644 (file)
@@ -401,6 +401,44 @@ static int sbc_enc_process_input_8s_be(int position,
                        position, pcm, X, nsamples, 1, 1);
 }
 
+/* Supplementary function to count the number of leading zeros */
+
+static inline int sbc_clz(uint32_t x)
+{
+#ifdef __GNUC__
+       return __builtin_clz(x);
+#else
+       /* TODO: this should be replaced with something better if good
+        * performance is wanted when using compilers other than gcc */
+       int cnt = 0;
+       while (x) {
+               cnt++;
+               x >>= 1;
+       }
+       return 32 - cnt;
+#endif
+}
+
+static void sbc_calc_scalefactors(
+       int32_t sb_sample_f[16][2][8],
+       uint32_t scale_factor[2][8],
+       int blocks, int channels, int subbands)
+{
+       int ch, sb, blk;
+       for (ch = 0; ch < channels; ch++) {
+               for (sb = 0; sb < subbands; sb++) {
+                       uint32_t x = 1 << SCALE_OUT_BITS;
+                       for (blk = 0; blk < blocks; blk++) {
+                               int32_t tmp = fabs(sb_sample_f[blk][ch][sb]);
+                               if (tmp != 0)
+                                       x |= tmp - 1;
+                       }
+                       scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) -
+                               sbc_clz(x);
+               }
+       }
+}
+
 /*
  * Detect CPU features and setup function pointers
  */
@@ -416,6 +454,9 @@ void sbc_init_primitives(struct sbc_encoder_state *state)
        state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le;
        state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be;
 
+       /* Default implementation for scale factors calculation */
+       state->sbc_calc_scalefactors = sbc_calc_scalefactors;
+
        /* X86/AMD64 optimizations */
 #ifdef SBC_BUILD_WITH_MMX_SUPPORT
        sbc_init_primitives_mmx(state);
index 5b7c9ac..2708c82 100644 (file)
@@ -58,6 +58,10 @@ struct sbc_encoder_state {
        int (*sbc_enc_process_input_8s_be)(int position,
                        const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
                        int nsamples, int nchannels);
+       /* Scale factors calculation */
+       void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8],
+                       uint32_t scale_factor[2][8],
+                       int blocks, int channels, int subbands);
 };
 
 /*