2 * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
3 * Copyright (C) 2009 David Conrad
4 * Copyright (C) 2011 Jordi Ortiz
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
32 #include "bytestream.h"
35 #include "dirac_arith.h"
36 #include "mpeg12data.h"
37 #include "dirac_dwt.h"
40 #include "videodsp.h" // for ff_emulated_edge_mc_8
43 * The spec limits the number of wavelet decompositions to 4 for both
44 * level 1 (VC-2) and 128 (long-gop default).
45 * 5 decompositions is the maximum before >16-bit buffers are needed.
46 * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
47 * the others to 4 decompositions (or 3 for the fidelity filter).
49 * We use this instead of MAX_DECOMPOSITIONS to save some memory.
51 #define MAX_DWT_LEVELS 5
54 * The spec limits this to 3 for frame coding, but in practice can be as high as 6
56 #define MAX_REFERENCE_FRAMES 8
57 #define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
58 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
59 #define MAX_QUANT 68 /* max quant for VC-2 */
60 #define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
63 * DiracBlock->ref flags, if set then the block does MC from the given ref
65 #define DIRAC_REF_MASK_REF1 1
66 #define DIRAC_REF_MASK_REF2 2
67 #define DIRAC_REF_MASK_GLOBAL 4
70 * Value of Picture.reference when Picture is not a reference picture, but
71 * is held for delayed output.
73 #define DELAYED_PIC_REF 4
75 #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */
77 #define CALC_PADDING(size, depth) \
78 (((size + (1 << depth) - 1) >> depth) << depth)
80 #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
84 int interpolated[3]; /* 1 if hpel[] is valid */
86 uint8_t *hpel_base[3][4];
93 } u; /* anonymous unions aren't in C99 :( */
97 typedef struct SubBand {
105 struct SubBand *parent;
109 const uint8_t *coeff_data;
112 typedef struct Plane {
121 IDWTELEM *idwt_buf_base;
127 /* block separation (block n+1 starts after this many pixels in block n) */
130 /* amount of overspill on each edge (half of the overlap between blocks) */
134 SubBand band[MAX_DWT_LEVELS][4];
137 typedef struct DiracContext {
138 AVCodecContext *avctx;
140 DiracDSPContext diracdsp;
142 dirac_source_params source;
143 int seen_sequence_header;
144 int frame_number; /* number of the next frame to display */
149 int zero_res; /* zero residue flag */
150 int is_arith; /* whether coeffs use arith or golomb coding */
151 int low_delay; /* use the low delay syntax */
152 int globalmc_flag; /* use global motion compensation */
153 int num_refs; /* number of reference pictures */
155 /* wavelet decoding */
156 unsigned wavelet_depth; /* depth of the IDWT */
157 unsigned wavelet_idx;
160 * schroedinger older than 1.0.8 doesn't store
161 * quant delta if only one codebook exists in a band
163 unsigned old_delta_quant;
164 unsigned codeblock_mode;
169 } codeblock[MAX_DWT_LEVELS+1];
172 unsigned num_x; /* number of horizontal slices */
173 unsigned num_y; /* number of vertical slices */
174 AVRational bytes; /* average bytes per slice */
175 uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
179 int pan_tilt[2]; /* pan/tilt vector */
180 int zrs[2][2]; /* zoom/rotate/shear matrix */
181 int perspective[2]; /* perspective vector */
183 unsigned perspective_exp;
186 /* motion compensation */
187 uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
188 int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
189 unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
191 int blwidth; /* number of blocks (horizontally) */
192 int blheight; /* number of blocks (vertically) */
193 int sbwidth; /* number of superblocks (horizontally) */
194 int sbheight; /* number of superblocks (vertically) */
197 DiracBlock *blmotion;
199 uint8_t *edge_emu_buffer[4];
200 uint8_t *edge_emu_buffer_base;
202 uint16_t *mctmp; /* buffer holding the MC data multipled by OBMC weights */
206 DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
208 void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
209 void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
210 void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
211 dirac_weight_func weight_func;
212 dirac_biweight_func biweight_func;
214 DiracFrame *current_picture;
215 DiracFrame *ref_pics[2];
217 DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
218 DiracFrame *delay_frames[MAX_DELAY+1];
219 DiracFrame all_frames[MAX_FRAMES];
223 * Dirac Specification ->
224 * Parse code values. 9.6.1 Table 9.1
226 enum dirac_parse_code {
227 pc_seq_header = 0x00,
240 static const uint8_t default_qmat[][4][4] = {
241 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
242 { { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
243 { { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
244 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
245 { { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
246 { { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
247 { { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
250 static const int qscale_tab[MAX_QUANT+1] = {
251 4, 5, 6, 7, 8, 10, 11, 13,
252 16, 19, 23, 27, 32, 38, 45, 54,
253 64, 76, 91, 108, 128, 152, 181, 215,
254 256, 304, 362, 431, 512, 609, 724, 861,
255 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
256 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
257 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
261 static const int qoffset_intra_tab[MAX_QUANT+1] = {
262 1, 2, 3, 4, 4, 5, 6, 7,
263 8, 10, 12, 14, 16, 19, 23, 27,
264 32, 38, 46, 54, 64, 76, 91, 108,
265 128, 152, 181, 216, 256, 305, 362, 431,
266 512, 609, 724, 861, 1024, 1218, 1448, 1722,
267 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
268 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
272 static const int qoffset_inter_tab[MAX_QUANT+1] = {
273 1, 2, 2, 3, 3, 4, 4, 5,
274 6, 7, 9, 10, 12, 14, 17, 20,
275 24, 29, 34, 41, 48, 57, 68, 81,
276 96, 114, 136, 162, 192, 228, 272, 323,
277 384, 457, 543, 646, 768, 913, 1086, 1292,
278 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
279 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
283 /* magic number division by 3 from schroedinger */
284 static inline int divide3(int x)
286 return ((x+1)*21845 + 10922) >> 16;
289 static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
291 DiracFrame *remove_pic = NULL;
292 int i, remove_idx = -1;
294 for (i = 0; framelist[i]; i++)
295 if (framelist[i]->avframe->display_picture_number == picnum) {
296 remove_pic = framelist[i];
301 for (i = remove_idx; framelist[i]; i++)
302 framelist[i] = framelist[i+1];
307 static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
310 for (i = 0; i < maxframes; i++)
312 framelist[i] = frame;
318 static int alloc_sequence_buffers(DiracContext *s)
320 int sbwidth = DIVRNDUP(s->source.width, 4);
321 int sbheight = DIVRNDUP(s->source.height, 4);
322 int i, w, h, top_padding;
324 /* todo: think more about this / use or set Plane here */
325 for (i = 0; i < 3; i++) {
326 int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
327 int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
328 w = s->source.width >> (i ? s->chroma_x_shift : 0);
329 h = s->source.height >> (i ? s->chroma_y_shift : 0);
331 /* we allocate the max we support here since num decompositions can
332 * change from frame to frame. Stride is aligned to 16 for SIMD, and
333 * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
334 * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
336 top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
337 w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
338 h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
340 s->plane[i].idwt_buf_base = av_mallocz((w+max_xblen)*h * sizeof(IDWTELEM));
341 s->plane[i].idwt_tmp = av_malloc((w+16) * sizeof(IDWTELEM));
342 s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
343 if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
344 return AVERROR(ENOMEM);
347 /* fixme: allocate using real stride here */
348 s->sbsplit = av_malloc_array(sbwidth, sbheight);
349 s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
351 if (!s->sbsplit || !s->blmotion)
352 return AVERROR(ENOMEM);
356 static int alloc_buffers(DiracContext *s, int stride)
358 int w = s->source.width;
359 int h = s->source.height;
361 av_assert0(stride >= w);
364 if (s->buffer_stride >= stride)
366 s->buffer_stride = 0;
368 av_freep(&s->edge_emu_buffer_base);
369 memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
371 av_freep(&s->mcscratch);
373 s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
375 s->mctmp = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
376 s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
378 if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
379 return AVERROR(ENOMEM);
381 s->buffer_stride = stride;
385 static void free_sequence_buffers(DiracContext *s)
389 for (i = 0; i < MAX_FRAMES; i++) {
390 if (s->all_frames[i].avframe->data[0]) {
391 av_frame_unref(s->all_frames[i].avframe);
392 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
395 for (j = 0; j < 3; j++)
396 for (k = 1; k < 4; k++)
397 av_freep(&s->all_frames[i].hpel_base[j][k]);
400 memset(s->ref_frames, 0, sizeof(s->ref_frames));
401 memset(s->delay_frames, 0, sizeof(s->delay_frames));
403 for (i = 0; i < 3; i++) {
404 av_freep(&s->plane[i].idwt_buf_base);
405 av_freep(&s->plane[i].idwt_tmp);
408 s->buffer_stride = 0;
409 av_freep(&s->sbsplit);
410 av_freep(&s->blmotion);
411 av_freep(&s->edge_emu_buffer_base);
414 av_freep(&s->mcscratch);
417 static av_cold int dirac_decode_init(AVCodecContext *avctx)
419 DiracContext *s = avctx->priv_data;
423 s->frame_number = -1;
425 if (avctx->flags&CODEC_FLAG_EMU_EDGE) {
426 av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported!\n");
427 return AVERROR_PATCHWELCOME;
430 ff_dsputil_init(&s->dsp, avctx);
431 ff_diracdsp_init(&s->diracdsp);
433 for (i = 0; i < MAX_FRAMES; i++) {
434 s->all_frames[i].avframe = av_frame_alloc();
435 if (!s->all_frames[i].avframe) {
437 av_frame_free(&s->all_frames[--i].avframe);
438 return AVERROR(ENOMEM);
445 static void dirac_decode_flush(AVCodecContext *avctx)
447 DiracContext *s = avctx->priv_data;
448 free_sequence_buffers(s);
449 s->seen_sequence_header = 0;
450 s->frame_number = -1;
453 static av_cold int dirac_decode_end(AVCodecContext *avctx)
455 DiracContext *s = avctx->priv_data;
458 dirac_decode_flush(avctx);
459 for (i = 0; i < MAX_FRAMES; i++)
460 av_frame_free(&s->all_frames[i].avframe);
465 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
467 static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
468 SubBand *b, IDWTELEM *buf, int x, int y)
472 int pred_ctx = CTX_ZPZN_F1;
474 /* Check if the parent subband has a 0 in the corresponding position */
476 pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
478 if (b->orientation == subband_hl)
479 sign_pred = buf[-b->stride];
481 /* Determine if the pixel has only zeros in its neighbourhood */
483 pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
484 if (b->orientation == subband_lh)
487 pred_ctx += !buf[-b->stride];
490 coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
492 coeff = (coeff * qfactor + qoffset + 2) >> 2;
493 sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
494 coeff = (coeff ^ -sign) + sign;
499 static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
503 coeff = svq3_get_ue_golomb(gb);
505 coeff = (coeff * qfactor + qoffset + 2) >> 2;
506 sign = get_bits1(gb);
507 coeff = (coeff ^ -sign) + sign;
513 * Decode the coeffs in the rectangle defined by left, right, top, bottom
514 * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
516 static inline void codeblock(DiracContext *s, SubBand *b,
517 GetBitContext *gb, DiracArith *c,
518 int left, int right, int top, int bottom,
519 int blockcnt_one, int is_arith)
521 int x, y, zero_block;
522 int qoffset, qfactor;
525 /* check for any coded coefficients in this codeblock */
528 zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
530 zero_block = get_bits1(gb);
536 if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
537 int quant = b->quant;
539 quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
541 quant += dirac_get_se_golomb(gb);
543 av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
549 b->quant = FFMIN(b->quant, MAX_QUANT);
551 qfactor = qscale_tab[b->quant];
552 /* TODO: context pointer? */
554 qoffset = qoffset_intra_tab[b->quant];
556 qoffset = qoffset_inter_tab[b->quant];
558 buf = b->ibuf + top * b->stride;
559 for (y = top; y < bottom; y++) {
560 for (x = left; x < right; x++) {
561 /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
563 coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
565 buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
572 * Dirac Specification ->
573 * 13.3 intra_dc_prediction(band)
575 static inline void intra_dc_prediction(SubBand *b)
577 IDWTELEM *buf = b->ibuf;
580 for (x = 1; x < b->width; x++)
584 for (y = 1; y < b->height; y++) {
585 buf[0] += buf[-b->stride];
587 for (x = 1; x < b->width; x++) {
588 int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
589 buf[x] += divide3(pred);
596 * Dirac Specification ->
597 * 13.4.2 Non-skipped subbands. subband_coeffs()
599 static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
601 int cb_x, cb_y, left, right, top, bottom;
604 int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
605 int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
606 int blockcnt_one = (cb_width + cb_height) == 2;
611 init_get_bits8(&gb, b->coeff_data, b->length);
614 ff_dirac_init_arith_decoder(&c, &gb, b->length);
617 for (cb_y = 0; cb_y < cb_height; cb_y++) {
618 bottom = (b->height * (cb_y+1LL)) / cb_height;
620 for (cb_x = 0; cb_x < cb_width; cb_x++) {
621 right = (b->width * (cb_x+1LL)) / cb_width;
622 codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
628 if (b->orientation == subband_ll && s->num_refs == 0)
629 intra_dc_prediction(b);
632 static int decode_subband_arith(AVCodecContext *avctx, void *b)
634 DiracContext *s = avctx->priv_data;
635 decode_subband_internal(s, b, 1);
639 static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
641 DiracContext *s = avctx->priv_data;
643 decode_subband_internal(s, *b, 0);
648 * Dirac Specification ->
649 * [DIRAC_STD] 13.4.1 core_transform_data()
651 static void decode_component(DiracContext *s, int comp)
653 AVCodecContext *avctx = s->avctx;
654 SubBand *bands[3*MAX_DWT_LEVELS+1];
655 enum dirac_subband orientation;
656 int level, num_bands = 0;
658 /* Unpack all subbands at all levels. */
659 for (level = 0; level < s->wavelet_depth; level++) {
660 for (orientation = !!level; orientation < 4; orientation++) {
661 SubBand *b = &s->plane[comp].band[level][orientation];
662 bands[num_bands++] = b;
664 align_get_bits(&s->gb);
665 /* [DIRAC_STD] 13.4.2 subband() */
666 b->length = svq3_get_ue_golomb(&s->gb);
668 b->quant = svq3_get_ue_golomb(&s->gb);
669 align_get_bits(&s->gb);
670 b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
671 b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
672 skip_bits_long(&s->gb, b->length*8);
675 /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
677 avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
678 NULL, 4-!!level, sizeof(SubBand));
680 /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
682 avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
685 /* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
686 /* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
687 static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
688 int slice_x, int slice_y, int bits_end,
689 SubBand *b1, SubBand *b2)
691 int left = b1->width * slice_x / s->lowdelay.num_x;
692 int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
693 int top = b1->height * slice_y / s->lowdelay.num_y;
694 int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
696 int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
697 int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
699 IDWTELEM *buf1 = b1->ibuf + top * b1->stride;
700 IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
702 /* we have to constantly check for overread since the spec explictly
703 requires this, with the meaning that all remaining coeffs are set to 0 */
704 if (get_bits_count(gb) >= bits_end)
707 for (y = top; y < bottom; y++) {
708 for (x = left; x < right; x++) {
709 buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
710 if (get_bits_count(gb) >= bits_end)
713 buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
714 if (get_bits_count(gb) >= bits_end)
724 struct lowdelay_slice {
733 * Dirac Specification ->
734 * 13.5.2 Slices. slice(sx,sy)
736 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
738 DiracContext *s = avctx->priv_data;
739 struct lowdelay_slice *slice = arg;
740 GetBitContext *gb = &slice->gb;
741 enum dirac_subband orientation;
742 int level, quant, chroma_bits, chroma_end;
744 int quant_base = get_bits(gb, 7); /*[DIRAC_STD] qindex */
745 int length_bits = av_log2(8 * slice->bytes)+1;
746 int luma_bits = get_bits_long(gb, length_bits);
747 int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
749 /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
750 for (level = 0; level < s->wavelet_depth; level++)
751 for (orientation = !!level; orientation < 4; orientation++) {
752 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
753 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
754 &s->plane[0].band[level][orientation], NULL);
757 /* consume any unused bits from luma */
758 skip_bits_long(gb, get_bits_count(gb) - luma_end);
760 chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
761 chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
762 /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
763 for (level = 0; level < s->wavelet_depth; level++)
764 for (orientation = !!level; orientation < 4; orientation++) {
765 quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
766 lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
767 &s->plane[1].band[level][orientation],
768 &s->plane[2].band[level][orientation]);
775 * Dirac Specification ->
776 * 13.5.1 low_delay_transform_data()
778 static void decode_lowdelay(DiracContext *s)
780 AVCodecContext *avctx = s->avctx;
781 int slice_x, slice_y, bytes, bufsize;
783 struct lowdelay_slice *slices;
786 slices = av_mallocz(s->lowdelay.num_x * s->lowdelay.num_y * sizeof(struct lowdelay_slice));
788 align_get_bits(&s->gb);
789 /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
790 buf = s->gb.buffer + get_bits_count(&s->gb)/8;
791 bufsize = get_bits_left(&s->gb);
793 for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
794 for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
795 bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
796 - slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
798 slices[slice_num].bytes = bytes;
799 slices[slice_num].slice_x = slice_x;
800 slices[slice_num].slice_y = slice_y;
801 init_get_bits(&slices[slice_num].gb, buf, bufsize);
808 avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
809 sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
810 intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
811 intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
812 intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
816 static void init_planes(DiracContext *s)
818 int i, w, h, level, orientation;
820 for (i = 0; i < 3; i++) {
821 Plane *p = &s->plane[i];
823 p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
824 p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
825 p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
826 p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
827 p->idwt_stride = FFALIGN(p->idwt_width, 8);
829 for (level = s->wavelet_depth-1; level >= 0; level--) {
832 for (orientation = !!level; orientation < 4; orientation++) {
833 SubBand *b = &p->band[level][orientation];
835 b->ibuf = p->idwt_buf;
837 b->stride = p->idwt_stride << (s->wavelet_depth - level);
840 b->orientation = orientation;
845 b->ibuf += b->stride>>1;
848 b->parent = &p->band[level-1][orientation];
853 p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
854 p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
855 p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
856 p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
859 p->xoffset = (p->xblen - p->xbsep)/2;
860 p->yoffset = (p->yblen - p->ybsep)/2;
865 * Unpack the motion compensation parameters
866 * Dirac Specification ->
867 * 11.2 Picture prediction data. picture_prediction()
869 static int dirac_unpack_prediction_parameters(DiracContext *s)
871 static const uint8_t default_blen[] = { 4, 12, 16, 24 };
872 static const uint8_t default_bsep[] = { 4, 8, 12, 16 };
874 GetBitContext *gb = &s->gb;
878 /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
879 /* Luma and Chroma are equal. 11.2.3 */
880 idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
883 av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
888 s->plane[0].xblen = svq3_get_ue_golomb(gb);
889 s->plane[0].yblen = svq3_get_ue_golomb(gb);
890 s->plane[0].xbsep = svq3_get_ue_golomb(gb);
891 s->plane[0].ybsep = svq3_get_ue_golomb(gb);
893 /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
894 s->plane[0].xblen = default_blen[idx-1];
895 s->plane[0].yblen = default_blen[idx-1];
896 s->plane[0].xbsep = default_bsep[idx-1];
897 s->plane[0].ybsep = default_bsep[idx-1];
899 /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
900 Calculated in function dirac_unpack_block_motion_data */
902 if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
903 av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
906 if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
907 av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
910 if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
911 av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
915 /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
916 Read motion vector precision */
917 s->mv_precision = svq3_get_ue_golomb(gb);
918 if (s->mv_precision > 3) {
919 av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
923 /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
924 Read the global motion compensation parameters */
925 s->globalmc_flag = get_bits1(gb);
926 if (s->globalmc_flag) {
927 memset(s->globalmc, 0, sizeof(s->globalmc));
928 /* [DIRAC_STD] pan_tilt(gparams) */
929 for (ref = 0; ref < s->num_refs; ref++) {
931 s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
932 s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
934 /* [DIRAC_STD] zoom_rotate_shear(gparams)
935 zoom/rotation/shear parameters */
937 s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
938 s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
939 s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
940 s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
941 s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
943 s->globalmc[ref].zrs[0][0] = 1;
944 s->globalmc[ref].zrs[1][1] = 1;
946 /* [DIRAC_STD] perspective(gparams) */
948 s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
949 s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
950 s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
955 /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
956 Picture prediction mode, not currently used. */
957 if (svq3_get_ue_golomb(gb)) {
958 av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
962 /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
963 just data read, weight calculation will be done later on. */
964 s->weight_log2denom = 1;
969 s->weight_log2denom = svq3_get_ue_golomb(gb);
970 s->weight[0] = dirac_get_se_golomb(gb);
971 if (s->num_refs == 2)
972 s->weight[1] = dirac_get_se_golomb(gb);
978 * Dirac Specification ->
979 * 11.3 Wavelet transform data. wavelet_transform()
981 static int dirac_unpack_idwt_params(DiracContext *s)
983 GetBitContext *gb = &s->gb;
987 #define CHECKEDREAD(dst, cond, errmsg) \
988 tmp = svq3_get_ue_golomb(gb); \
990 av_log(s->avctx, AV_LOG_ERROR, errmsg); \
997 s->zero_res = s->num_refs ? get_bits1(gb) : 0;
1001 /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
1002 CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
1004 CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
1006 if (!s->low_delay) {
1007 /* Codeblock parameters (core syntax only) */
1008 if (get_bits1(gb)) {
1009 for (i = 0; i <= s->wavelet_depth; i++) {
1010 CHECKEDREAD(s->codeblock[i].width , tmp < 1 || tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
1011 CHECKEDREAD(s->codeblock[i].height, tmp < 1 || tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
1014 CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
1016 for (i = 0; i <= s->wavelet_depth; i++)
1017 s->codeblock[i].width = s->codeblock[i].height = 1;
1019 /* Slice parameters + quantization matrix*/
1020 /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
1021 s->lowdelay.num_x = svq3_get_ue_golomb(gb);
1022 s->lowdelay.num_y = svq3_get_ue_golomb(gb);
1023 s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
1024 s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
1026 if (s->lowdelay.bytes.den <= 0) {
1027 av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
1028 return AVERROR_INVALIDDATA;
1031 /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
1032 if (get_bits1(gb)) {
1033 av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
1034 /* custom quantization matrix */
1035 s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
1036 for (level = 0; level < s->wavelet_depth; level++) {
1037 s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
1038 s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
1039 s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
1042 if (s->wavelet_depth > 4) {
1043 av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
1044 return AVERROR_INVALIDDATA;
1046 /* default quantization matrix */
1047 for (level = 0; level < s->wavelet_depth; level++)
1048 for (i = 0; i < 4; i++) {
1049 s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
1050 /* haar with no shift differs for different depths */
1051 if (s->wavelet_idx == 3)
1052 s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
1059 static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
1061 static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
1068 return sbsplit[-stride];
1070 return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
1073 static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
1080 return block[-1].ref & refmask;
1082 return block[-stride].ref & refmask;
1084 /* return the majority */
1085 pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
1086 return (pred >> 1) & refmask;
1089 static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
1093 memset(block->u.dc, 0, sizeof(block->u.dc));
1095 if (x && !(block[-1].ref & 3)) {
1096 for (i = 0; i < 3; i++)
1097 block->u.dc[i] += block[-1].u.dc[i];
1101 if (y && !(block[-stride].ref & 3)) {
1102 for (i = 0; i < 3; i++)
1103 block->u.dc[i] += block[-stride].u.dc[i];
1107 if (x && y && !(block[-1-stride].ref & 3)) {
1108 for (i = 0; i < 3; i++)
1109 block->u.dc[i] += block[-1-stride].u.dc[i];
1114 for (i = 0; i < 3; i++)
1115 block->u.dc[i] = (block->u.dc[i]+1)>>1;
1116 } else if (n == 3) {
1117 for (i = 0; i < 3; i++)
1118 block->u.dc[i] = divide3(block->u.dc[i]);
1122 static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
1125 int refmask = ref+1;
1126 int mask = refmask | DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
1129 if (x && (block[-1].ref & mask) == refmask)
1130 pred[n++] = block[-1].u.mv[ref];
1132 if (y && (block[-stride].ref & mask) == refmask)
1133 pred[n++] = block[-stride].u.mv[ref];
1135 if (x && y && (block[-stride-1].ref & mask) == refmask)
1136 pred[n++] = block[-stride-1].u.mv[ref];
1140 block->u.mv[ref][0] = 0;
1141 block->u.mv[ref][1] = 0;
1144 block->u.mv[ref][0] = pred[0][0];
1145 block->u.mv[ref][1] = pred[0][1];
1148 block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
1149 block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
1152 block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
1153 block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
1158 static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
1160 int ez = s->globalmc[ref].zrs_exp;
1161 int ep = s->globalmc[ref].perspective_exp;
1162 int (*A)[2] = s->globalmc[ref].zrs;
1163 int *b = s->globalmc[ref].pan_tilt;
1164 int *c = s->globalmc[ref].perspective;
1166 int m = (1<<ep) - (c[0]*x + c[1]*y);
1167 int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
1168 int my = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
1170 block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
1171 block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
1174 static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
1175 int stride, int x, int y)
1179 block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
1180 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
1182 if (s->num_refs == 2) {
1183 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
1184 block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
1188 pred_block_dc(block, stride, x, y);
1189 for (i = 0; i < 3; i++)
1190 block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
1194 if (s->globalmc_flag) {
1195 block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
1196 block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
1199 for (i = 0; i < s->num_refs; i++)
1200 if (block->ref & (i+1)) {
1201 if (block->ref & DIRAC_REF_MASK_GLOBAL) {
1202 global_mv(s, block, x, y, i);
1204 pred_mv(block, stride, x, y, i);
1205 block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1206 block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
1212 * Copies the current block to the other blocks covered by the current superblock split mode
1214 static void propagate_block_data(DiracBlock *block, int stride, int size)
1217 DiracBlock *dst = block;
1219 for (x = 1; x < size; x++)
1222 for (y = 1; y < size; y++) {
1224 for (x = 0; x < size; x++)
1230 * Dirac Specification ->
1231 * 12. Block motion data syntax
1233 static int dirac_unpack_block_motion_data(DiracContext *s)
1235 GetBitContext *gb = &s->gb;
1236 uint8_t *sbsplit = s->sbsplit;
1238 DiracArith arith[8];
1242 /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
1243 s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
1244 s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
1245 s->blwidth = 4 * s->sbwidth;
1246 s->blheight = 4 * s->sbheight;
1248 /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
1249 decode superblock split modes */
1250 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
1251 for (y = 0; y < s->sbheight; y++) {
1252 for (x = 0; x < s->sbwidth; x++) {
1253 unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
1256 sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
1258 sbsplit += s->sbwidth;
1261 /* setup arith decoding */
1262 ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
1263 for (i = 0; i < s->num_refs; i++) {
1264 ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
1265 ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
1267 for (i = 0; i < 3; i++)
1268 ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
1270 for (y = 0; y < s->sbheight; y++)
1271 for (x = 0; x < s->sbwidth; x++) {
1272 int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
1273 int step = 4 >> s->sbsplit[y * s->sbwidth + x];
1275 for (q = 0; q < blkcnt; q++)
1276 for (p = 0; p < blkcnt; p++) {
1277 int bx = 4 * x + p*step;
1278 int by = 4 * y + q*step;
1279 DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
1280 decode_block_params(s, arith, block, s->blwidth, bx, by);
1281 propagate_block_data(block, s->blwidth, step);
1288 static int weight(int i, int blen, int offset)
1290 #define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
1291 (1 + (6*(i) + offset - 1) / (2*offset - 1))
1295 else if (i > blen-1 - 2*offset)
1296 return ROLLOFF(blen-1 - i);
1300 static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
1301 int left, int right, int wy)
1304 for (x = 0; left && x < p->xblen >> 1; x++)
1305 obmc_weight[x] = wy*8;
1306 for (; x < p->xblen >> right; x++)
1307 obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
1308 for (; x < p->xblen; x++)
1309 obmc_weight[x] = wy*8;
1310 for (; x < stride; x++)
1314 static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
1315 int left, int right, int top, int bottom)
1318 for (y = 0; top && y < p->yblen >> 1; y++) {
1319 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1320 obmc_weight += stride;
1322 for (; y < p->yblen >> bottom; y++) {
1323 int wy = weight(y, p->yblen, p->yoffset);
1324 init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
1325 obmc_weight += stride;
1327 for (; y < p->yblen; y++) {
1328 init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
1329 obmc_weight += stride;
1333 static void init_obmc_weights(DiracContext *s, Plane *p, int by)
1336 int bottom = by == s->blheight-1;
1338 /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
1339 if (top || bottom || by == 1) {
1340 init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
1341 init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
1342 init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
1346 static const uint8_t epel_weights[4][4][4] = {
1366 * For block x,y, determine which of the hpel planes to do bilinear
1367 * interpolation from and set src[] to the location in each hpel plane
1370 * @return the index of the put_dirac_pixels_tab function to use
1371 * 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
1373 static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
1374 int x, int y, int ref, int plane)
1376 Plane *p = &s->plane[plane];
1377 uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
1378 int motion_x = block->u.mv[ref][0];
1379 int motion_y = block->u.mv[ref][1];
1380 int mx, my, i, epel, nplanes = 0;
1383 motion_x >>= s->chroma_x_shift;
1384 motion_y >>= s->chroma_y_shift;
1387 mx = motion_x & ~(-1U << s->mv_precision);
1388 my = motion_y & ~(-1U << s->mv_precision);
1389 motion_x >>= s->mv_precision;
1390 motion_y >>= s->mv_precision;
1391 /* normalize subpel coordinates to epel */
1392 /* TODO: template this function? */
1393 mx <<= 3 - s->mv_precision;
1394 my <<= 3 - s->mv_precision;
1403 src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
1407 for (i = 0; i < 4; i++)
1408 src[i] = ref_hpel[i] + y*p->stride + x;
1410 /* if we're interpolating in the right/bottom halves, adjust the planes as needed
1411 we increment x/y because the edge changes for half of the pixels */
1418 src[0] += p->stride;
1419 src[1] += p->stride;
1427 /* check if we really only need 2 planes since either mx or my is
1428 a hpel position. (epel weights of 0 handle this there) */
1430 /* mx == 0: average [0] and [2]
1431 mx == 4: average [1] and [3] */
1432 src[!mx] = src[2 + !!mx];
1434 } else if (!(my&3)) {
1435 src[0] = src[(my>>1) ];
1436 src[1] = src[(my>>1)+1];
1440 /* adjust the ordering if needed so the weights work */
1442 FFSWAP(const uint8_t *, src[0], src[1]);
1443 FFSWAP(const uint8_t *, src[2], src[3]);
1446 FFSWAP(const uint8_t *, src[0], src[2]);
1447 FFSWAP(const uint8_t *, src[1], src[3]);
1449 src[4] = epel_weights[my&3][mx&3];
1453 /* fixme: v/h _edge_pos */
1454 if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
1455 y + p->yblen > p->height+EDGE_WIDTH/2 ||
1457 for (i = 0; i < nplanes; i++) {
1458 ff_emulated_edge_mc(s->edge_emu_buffer[i], p->stride,
1460 p->xblen, p->yblen, x, y,
1461 p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
1462 src[i] = s->edge_emu_buffer[i];
1465 return (nplanes>>1) + epel;
1468 static void add_dc(uint16_t *dst, int dc, int stride,
1469 uint8_t *obmc_weight, int xblen, int yblen)
1474 for (y = 0; y < yblen; y++) {
1475 for (x = 0; x < xblen; x += 2) {
1476 dst[x ] += dc * obmc_weight[x ];
1477 dst[x+1] += dc * obmc_weight[x+1];
1480 obmc_weight += MAX_BLOCKSIZE;
1484 static void block_mc(DiracContext *s, DiracBlock *block,
1485 uint16_t *mctmp, uint8_t *obmc_weight,
1486 int plane, int dstx, int dsty)
1488 Plane *p = &s->plane[plane];
1489 const uint8_t *src[5];
1492 switch (block->ref&3) {
1494 add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
1498 idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
1499 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1501 s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
1502 s->weight[0] + s->weight[1], p->yblen);
1505 idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
1506 s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1507 idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
1508 if (s->biweight_func) {
1509 /* fixme: +32 is a quick hack */
1510 s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
1511 s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
1512 s->weight[0], s->weight[1], p->yblen);
1514 s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
1517 s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
1520 static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
1522 Plane *p = &s->plane[plane];
1523 int x, dstx = p->xbsep - p->xoffset;
1525 block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
1528 for (x = 1; x < s->blwidth-1; x++) {
1529 block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
1533 block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
1536 static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
1544 memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
1545 memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
1546 s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
1547 if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
1548 s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
1549 s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
1551 s->weight_func = NULL;
1552 s->biweight_func = NULL;
1556 static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
1558 /* chroma allocates an edge of 8 when subsampled
1559 which for 4:2:2 means an h edge of 16 and v edge of 8
1560 just use 8 for everything for the moment */
1561 int i, edge = EDGE_WIDTH/2;
1563 ref->hpel[plane][0] = ref->avframe->data[plane];
1564 s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
1566 /* no need for hpel if we only have fpel vectors */
1567 if (!s->mv_precision)
1570 for (i = 1; i < 4; i++) {
1571 if (!ref->hpel_base[plane][i])
1572 ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
1573 /* we need to be 16-byte aligned even for chroma */
1574 ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
1577 if (!ref->interpolated[plane]) {
1578 s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
1579 ref->hpel[plane][3], ref->hpel[plane][0],
1580 ref->avframe->linesize[plane], width, height);
1581 s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1582 s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1583 s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
1585 ref->interpolated[plane] = 1;
1589 * Dirac Specification ->
1590 * 13.0 Transform data syntax. transform_data()
1592 static int dirac_decode_frame_internal(DiracContext *s)
1595 int y, i, comp, dsty;
1598 /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
1599 for (comp = 0; comp < 3; comp++) {
1600 Plane *p = &s->plane[comp];
1601 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1607 for (comp = 0; comp < 3; comp++) {
1608 Plane *p = &s->plane[comp];
1609 uint8_t *frame = s->current_picture->avframe->data[comp];
1611 /* FIXME: small resolutions */
1612 for (i = 0; i < 4; i++)
1613 s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
1615 if (!s->zero_res && !s->low_delay)
1617 memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
1618 decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
1620 if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
1621 s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
1624 if (!s->num_refs) { /* intra */
1625 for (y = 0; y < p->height; y += 16) {
1626 ff_spatial_idwt_slice2(&d, y+16); /* decode */
1627 s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
1628 p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
1630 } else { /* inter */
1631 int rowheight = p->ybsep*p->stride;
1633 select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
1635 for (i = 0; i < s->num_refs; i++)
1636 interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
1638 memset(s->mctmp, 0, 4*p->yoffset*p->stride);
1641 for (y = 0; y < s->blheight; y++) {
1643 start = FFMAX(dsty, 0);
1644 uint16_t *mctmp = s->mctmp + y*rowheight;
1645 DiracBlock *blocks = s->blmotion + y*s->blwidth;
1647 init_obmc_weights(s, p, y);
1649 if (y == s->blheight-1 || start+p->ybsep > p->height)
1650 h = p->height - start;
1652 h = p->ybsep - (start - dsty);
1656 memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
1657 mc_row(s, blocks, mctmp, comp, dsty);
1659 mctmp += (start - dsty)*p->stride + p->xoffset;
1660 ff_spatial_idwt_slice2(&d, start + h); /* decode */
1661 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
1662 p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
1674 * Dirac Specification ->
1675 * 11.1.1 Picture Header. picture_header()
1677 static int dirac_decode_picture_header(DiracContext *s)
1680 int i, j, refnum, refdist;
1681 GetBitContext *gb = &s->gb;
1683 /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
1684 picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
1687 av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
1689 /* if this is the first keyframe after a sequence header, start our
1690 reordering from here */
1691 if (s->frame_number < 0)
1692 s->frame_number = picnum;
1694 s->ref_pics[0] = s->ref_pics[1] = NULL;
1695 for (i = 0; i < s->num_refs; i++) {
1696 refnum = picnum + dirac_get_se_golomb(gb);
1699 /* find the closest reference to the one we want */
1700 /* Jordi: this is needed if the referenced picture hasn't yet arrived */
1701 for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
1702 if (s->ref_frames[j]
1703 && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
1704 s->ref_pics[i] = s->ref_frames[j];
1705 refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
1708 if (!s->ref_pics[i] || refdist)
1709 av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
1711 /* if there were no references at all, allocate one */
1712 if (!s->ref_pics[i])
1713 for (j = 0; j < MAX_FRAMES; j++)
1714 if (!s->all_frames[j].avframe->data[0]) {
1715 s->ref_pics[i] = &s->all_frames[j];
1716 ff_get_buffer(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
1721 /* retire the reference frames that are not used anymore */
1722 if (s->current_picture->avframe->reference) {
1723 retire = picnum + dirac_get_se_golomb(gb);
1724 if (retire != picnum) {
1725 DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
1728 retire_pic->avframe->reference &= DELAYED_PIC_REF;
1730 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
1733 /* if reference array is full, remove the oldest as per the spec */
1734 while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
1735 av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
1736 remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
1741 if (dirac_unpack_prediction_parameters(s)) /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
1743 if (dirac_unpack_block_motion_data(s)) /* [DIRAC_STD] 12. Block motion data syntax */
1746 if (dirac_unpack_idwt_params(s)) /* [DIRAC_STD] 11.3 Wavelet transform data */
1753 static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
1755 DiracFrame *out = s->delay_frames[0];
1759 /* find frame with lowest picture number */
1760 for (i = 1; s->delay_frames[i]; i++)
1761 if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
1762 out = s->delay_frames[i];
1766 for (i = out_idx; s->delay_frames[i]; i++)
1767 s->delay_frames[i] = s->delay_frames[i+1];
1770 out->avframe->reference ^= DELAYED_PIC_REF;
1772 if((ret = av_frame_ref(picture, out->avframe)) < 0)
1780 * Dirac Specification ->
1781 * 9.6 Parse Info Header Syntax. parse_info()
1782 * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
1784 #define DATA_UNIT_HEADER_SIZE 13
1786 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
1787 inside the function parse_sequence() */
1788 static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
1790 DiracContext *s = avctx->priv_data;
1791 DiracFrame *pic = NULL;
1792 int ret, i, parse_code = buf[4];
1795 if (size < DATA_UNIT_HEADER_SIZE)
1798 init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
1800 if (parse_code == pc_seq_header) {
1801 if (s->seen_sequence_header)
1804 /* [DIRAC_STD] 10. Sequence header */
1805 if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
1808 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
1810 if (alloc_sequence_buffers(s))
1813 s->seen_sequence_header = 1;
1814 } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
1815 free_sequence_buffers(s);
1816 s->seen_sequence_header = 0;
1817 } else if (parse_code == pc_aux_data) {
1818 if (buf[13] == 1) { /* encoder implementation/version */
1820 /* versions older than 1.0.8 don't store quant delta for
1821 subbands with only one codeblock */
1822 if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
1823 if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
1824 s->old_delta_quant = 1;
1826 } else if (parse_code & 0x8) { /* picture data unit */
1827 if (!s->seen_sequence_header) {
1828 av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
1832 /* find an unused frame */
1833 for (i = 0; i < MAX_FRAMES; i++)
1834 if (s->all_frames[i].avframe->data[0] == NULL)
1835 pic = &s->all_frames[i];
1837 av_log(avctx, AV_LOG_ERROR, "framelist full\n");
1841 av_frame_unref(pic->avframe);
1843 /* [DIRAC_STD] Defined in 9.6.1 ... */
1844 tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
1846 av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
1850 s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
1851 s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
1852 pic->avframe->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
1853 pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
1854 pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
1856 if ((ret = ff_get_buffer(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1858 s->current_picture = pic;
1859 s->plane[0].stride = pic->avframe->linesize[0];
1860 s->plane[1].stride = pic->avframe->linesize[1];
1861 s->plane[2].stride = pic->avframe->linesize[2];
1863 if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
1864 return AVERROR(ENOMEM);
1866 /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
1867 if (dirac_decode_picture_header(s))
1870 /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
1871 if (dirac_decode_frame_internal(s))
1877 static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
1879 DiracContext *s = avctx->priv_data;
1880 AVFrame *picture = data;
1881 uint8_t *buf = pkt->data;
1882 int buf_size = pkt->size;
1883 int i, data_unit_size, buf_idx = 0;
1886 /* release unused frames */
1887 for (i = 0; i < MAX_FRAMES; i++)
1888 if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
1889 av_frame_unref(s->all_frames[i].avframe);
1890 memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
1893 s->current_picture = NULL;
1896 /* end of stream, so flush delayed pics */
1898 return get_delayed_pic(s, (AVFrame *)data, got_frame);
1901 /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
1902 [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
1903 BBCD start code search */
1904 for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
1905 if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
1906 buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
1909 /* BBCD found or end of data */
1910 if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
1913 data_unit_size = AV_RB32(buf+buf_idx+5);
1914 if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
1915 if(buf_idx + data_unit_size > buf_size)
1916 av_log(s->avctx, AV_LOG_ERROR,
1917 "Data unit with size %d is larger than input buffer, discarding\n",
1922 /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
1923 if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
1925 av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
1928 buf_idx += data_unit_size;
1931 if (!s->current_picture)
1934 if (s->current_picture->avframe->display_picture_number > s->frame_number) {
1935 DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
1937 s->current_picture->avframe->reference |= DELAYED_PIC_REF;
1939 if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
1940 int min_num = s->delay_frames[0]->avframe->display_picture_number;
1941 /* Too many delayed frames, so we display the frame with the lowest pts */
1942 av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
1943 delayed_frame = s->delay_frames[0];
1945 for (i = 1; s->delay_frames[i]; i++)
1946 if (s->delay_frames[i]->avframe->display_picture_number < min_num)
1947 min_num = s->delay_frames[i]->avframe->display_picture_number;
1949 delayed_frame = remove_frame(s->delay_frames, min_num);
1950 add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
1953 if (delayed_frame) {
1954 delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
1955 if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
1959 } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
1960 /* The right frame at the right time :-) */
1961 if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
1967 s->frame_number = picture->display_picture_number + 1;
1972 AVCodec ff_dirac_decoder = {
1974 .long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
1975 .type = AVMEDIA_TYPE_VIDEO,
1976 .id = AV_CODEC_ID_DIRAC,
1977 .priv_data_size = sizeof(DiracContext),
1978 .init = dirac_decode_init,
1979 .close = dirac_decode_end,
1980 .decode = dirac_decode_frame,
1981 .capabilities = CODEC_CAP_DELAY,
1982 .flush = dirac_decode_flush,