From 80721cc1ff1f1c8c460c136184ed6416a73b4bfd Mon Sep 17 00:00:00 2001 From: Rostislav Pehlivanov Date: Thu, 23 Jun 2016 18:06:56 +0100 Subject: [PATCH] diracdsp: add dequantization SIMD Currently unused, to be used in the following commits. Signed-off-by: Rostislav Pehlivanov --- libavcodec/diracdsp.c | 24 ++++++++++++++++++++++++ libavcodec/diracdsp.h | 4 ++++ libavcodec/x86/diracdsp.asm | 37 +++++++++++++++++++++++++++++++++++++ libavcodec/x86/diracdsp_init.c | 6 ++++++ 4 files changed, 71 insertions(+) diff --git a/libavcodec/diracdsp.c b/libavcodec/diracdsp.c index ab8d1497f7..cd1209e209 100644 --- a/libavcodec/diracdsp.c +++ b/libavcodec/diracdsp.c @@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride, } } +#define DEQUANT_SUBBAND(PX) \ +static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \ + const int qf, const int qs, int tot_v, int tot_h) \ +{ \ + int i, y; \ + for (y = 0; y < tot_v; y++) { \ + PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \ + for (i = 0; i < tot_h; i++) { \ + c = *src_r++; \ + sign = FFSIGN(c)*(!!c); \ + c = (FFABS(c)*qf + qs) >> 2; \ + *dst_r++ = c*sign; \ + } \ + src += tot_h << (sizeof(PX) >> 1); \ + dst += stride; \ + } \ +} + +DEQUANT_SUBBAND(int16_t) +DEQUANT_SUBBAND(int32_t) + #define PIXFUNC(PFX, WIDTH) \ c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ @@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c) c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c; c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c; + c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c; + c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c; + PIXFUNC(put, 8); PIXFUNC(put, 16); PIXFUNC(put, 32); diff --git a/libavcodec/diracdsp.h b/libavcodec/diracdsp.h index 25a872d846..224828d880 100644 --- a/libavcodec/diracdsp.h +++ b/libavcodec/diracdsp.h @@ -22,6 +22,7 @@ #define AVCODEC_DIRACDSP_H #include +#include typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h); typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h); @@ -46,6 +47,9 @@ typedef struct { void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/); void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); + /* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */ + void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); + dirac_weight_func weight_dirac_pixels_tab[3]; dirac_biweight_func biweight_dirac_pixels_tab[3]; } DiracDSPContext; diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm index a042413c3a..8e9f0fbf02 100644 --- a/libavcodec/x86/diracdsp.asm +++ b/libavcodec/x86/diracdsp.asm @@ -263,3 +263,40 @@ ADD_RECT sse2 HPEL_FILTER sse2 ADD_OBMC 32, sse2 ADD_OBMC 16, sse2 + +INIT_XMM sse4 + +; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h) +cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h + movd m2, qfd + movd m3, qsd + SPLATD m2 + SPLATD m3 + mov r4, tot_hq + mov r3, dstq + + .loop_v: + mov tot_hq, r4 + mov dstq, r3 + + .loop_h: + movu m0, [srcq] + + pabsd m1, m0 + pmulld m1, m2 + paddd m1, m3 + psrld m1, 2 + psignd m1, m0 + + movu [dstq], m1 + + add srcq, mmsize + add dstq, mmsize + sub tot_hd, 4 + jg .loop_h + + add r3, strideq + dec tot_vd + jg .loop_v + + RET diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index 5fae79891b..26b885d530 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); +void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); + #if HAVE_YASM #define HPEL_FILTER(MMSIZE, EXT) \ @@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; } + + if (EXTERNAL_SSE4(mm_flags)) { + c->dequant_subband[1] = ff_dequant_subband_32_sse4; + } } -- 2.11.0