}
}
+static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
+{
+ int i, v = 0;
+ for (i = 0; i < len; i++)
+ v |= abs(src[i]);
+ return v;
+}
+
av_cold void ff_ac3dsp_init(AC3DSPContext *c)
{
c->ac3_exponent_min = ac3_exponent_min_c;
+ c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
if (HAVE_MMX)
ff_ac3dsp_init_x86(c);
* @param nb_coefs number of frequency coefficients.
*/
void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+
+ /**
+ * Calculate the maximum MSB of the absolute value of each element in an
+ * array of int16_t.
+ * @param src input array
+ * constraints: align 16. values must be in range [-32767,32767]
+ * @param len number of values in the array
+ * constraints: multiple of 16 greater than 0
+ * @return a value with the same MSB as max(abs(src[]))
+ */
+ int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
} AC3DSPContext;
void ff_ac3dsp_init (AC3DSPContext *c);
* @param n number of values in the array
* @return log2(max(abs(tab[])))
*/
-static int log2_tab(int16_t *tab, int n)
+static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
{
- int i, v;
-
- v = 0;
- for (i = 0; i < n; i++)
- v |= abs(tab[i]);
-
+ int v = s->ac3dsp.ac3_max_msb_abs_int16(src, len);
return av_log2(v);
}
*/
static int normalize_samples(AC3EncodeContext *s)
{
- int v = 14 - log2_tab(s->windowed_samples, AC3_WINDOW_SIZE);
+ int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v);
return v - 9;
}
%endif
%undef PMINUB
%undef LOOP_ALIGN
+
+;-----------------------------------------------------------------------------
+; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
+;
+; This function uses 2 different methods to calculate a valid result.
+; 1) logical 'or' of abs of each element
+; This is used for ssse3 because of the pabsw instruction.
+; It is also used for mmx because of the lack of min/max instructions.
+; 2) calculate min/max for the array, then or(abs(min),abs(max))
+; This is used for mmxext and sse2 because they have pminsw/pmaxsw.
+;-----------------------------------------------------------------------------
+
+%macro AC3_MAX_MSB_ABS_INT16 2
+cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
+ pxor m2, m2
+ pxor m3, m3
+.loop:
+%ifidn %2, min_max
+ mova m0, [srcq]
+ mova m1, [srcq+mmsize]
+ pminsw m2, m0
+ pminsw m2, m1
+ pmaxsw m3, m0
+ pmaxsw m3, m1
+%else ; or_abs
+%ifidn %1, mmx
+ mova m0, [srcq]
+ mova m1, [srcq+mmsize]
+ ABS2 m0, m1, m3, m4
+%else ; ssse3
+ ; using memory args is faster for ssse3
+ pabsw m0, [srcq]
+ pabsw m1, [srcq+mmsize]
+%endif
+ por m2, m0
+ por m2, m1
+%endif
+ add srcq, mmsize*2
+ sub lend, mmsize
+ ja .loop
+%ifidn %2, min_max
+ ABS2 m2, m3, m0, m1
+ por m2, m3
+%endif
+%ifidn mmsize, 16
+ mova m0, m2
+ punpckhqdq m0, m0
+ por m2, m0
+%endif
+ PSHUFLW m0, m2, 0xe
+ por m2, m0
+ PSHUFLW m0, m2, 0x1
+ por m2, m0
+ movd eax, m2
+ and eax, 0xFFFF
+ RET
+%endmacro
+
+INIT_MMX
+%define ABS2 ABS2_MMX
+%define PSHUFLW pshufw
+AC3_MAX_MSB_ABS_INT16 mmx, or_abs
+%define ABS2 ABS2_MMX2
+AC3_MAX_MSB_ABS_INT16 mmxext, min_max
+INIT_XMM
+%define PSHUFLW pshuflw
+AC3_MAX_MSB_ABS_INT16 sse2, min_max
+%define ABS2 ABS2_SSSE3
+AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
extern void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
+extern int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
+extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
+
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
{
int mm_flags = av_get_cpu_flags();
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
}
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
}
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
+ }
+ if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
+ c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
}
#endif
}