c->update_bap_counts = ac3_update_bap_counts_c;
c->compute_mantissa_size = ac3_compute_mantissa_size_c;
c->extract_exponents = ac3_extract_exponents_c;
+ c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c;
+ c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c;
c->downmix = ac3_downmix_c;
+ c->apply_window_int16 = apply_window_int16_c;
if (ARCH_ARM)
ff_ac3dsp_init_arm(c, bit_exact);
void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
+ void (*sum_square_butterfly_int32)(int64_t sum[4], const int32_t *coef0,
+ const int32_t *coef1, int len);
+
+ void (*sum_square_butterfly_float)(float sum[4], const float *coef0,
+ const float *coef1, int len);
+
void (*downmix)(float **samples, float (*matrix)[2], int out_ch,
int in_ch, int len);
+
+ /**
+ * Apply symmetric window in 16-bit fixed-point.
+ * @param output destination array
+ * constraints: 16-byte aligned
+ * @param input source array
+ * constraints: 16-byte aligned
+ * @param window window array
+ * constraints: 16-byte aligned, at least len/2 elements
+ * @param len full window length
+ * constraints: multiple of ? greater than zero
+ */
+ void (*apply_window_int16)(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
} AC3DSPContext;
void ff_ac3dsp_init (AC3DSPContext *c, int bit_exact);
void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
+ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
+ const int16_t *window, unsigned n);
+void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
+ const int32_t *coef0,
+ const int32_t *coef1,
+ int len);
+void ff_ac3_sum_square_butterfly_float_neon(float sum[4],
+ const float *coef0,
+ const float *coef1,
+ int len);
void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd,
int start, int end,
c->ac3_rshift_int32 = ff_ac3_rshift_int32_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
+ c->apply_window_int16 = ff_apply_window_int16_neon;
+ c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
+ c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
}
}
bx lr
endfunc
+ function ff_apply_window_int16_neon, export=1
+ push {r4,lr}
+ add r4, r1, r3, lsl #1
+ add lr, r0, r3, lsl #1
+ sub r4, r4, #16
+ sub lr, lr, #16
+ mov r12, #-16
+ 1:
+ vld1.16 {q0}, [r1,:128]!
+ vld1.16 {q2}, [r2,:128]!
+ vld1.16 {q1}, [r4,:128], r12
+ vrev64.16 q3, q2
+ vqrdmulh.s16 q0, q0, q2
+ vqrdmulh.s16 d2, d2, d7
+ vqrdmulh.s16 d3, d3, d6
+ vst1.16 {q0}, [r0,:128]!
+ vst1.16 {q1}, [lr,:128], r12
+ subs r3, r3, #16
+ bgt 1b
+
+ pop {r4,pc}
+ endfunc
++
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+ vmov.i64 q0, #0
+ vmov.i64 q1, #0
+ vmov.i64 q2, #0
+ vmov.i64 q3, #0
+1:
+ vld1.32 {d16}, [r1]!
+ vld1.32 {d17}, [r2]!
+ vadd.s32 d18, d16, d17
+ vsub.s32 d19, d16, d17
+ vmlal.s32 q0, d16, d16
+ vmlal.s32 q1, d17, d17
+ vmlal.s32 q2, d18, d18
+ vmlal.s32 q3, d19, d19
+ subs r3, r3, #2
+ bgt 1b
+ vadd.s64 d0, d0, d1
+ vadd.s64 d1, d2, d3
+ vadd.s64 d2, d4, d5
+ vadd.s64 d3, d6, d7
+ vst1.64 {q0-q1}, [r0]
+ bx lr
+endfunc
+
+function ff_ac3_sum_square_butterfly_float_neon, export=1
+ vmov.f32 q0, #0.0
+ vmov.f32 q1, #0.0
+1:
+ vld1.32 {d16}, [r1]!
+ vld1.32 {d17}, [r2]!
+ vadd.f32 d18, d16, d17
+ vsub.f32 d19, d16, d17
+ vmla.f32 d0, d16, d16
+ vmla.f32 d1, d17, d17
+ vmla.f32 d2, d18, d18
+ vmla.f32 d3, d19, d19
+ subs r3, r3, #2
+ bgt 1b
+ vpadd.f32 d0, d0, d1
+ vpadd.f32 d1, d2, d3
+ vst1.32 {q0}, [r0]
+ bx lr
+endfunc
void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+ void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+ void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+ void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+ void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+ void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
+ const int16_t *window, unsigned int len);
+
+#if ARCH_X86_32 && defined(__INTEL_COMPILER)
+# undef HAVE_7REGS
+# define HAVE_7REGS 0
+#endif
+
#if HAVE_SSE_INLINE && HAVE_7REGS
#define IF1(x) x