libavcodec/cinepakenc.c

   1 /*
   2  * Cinepak encoder (c) 2011 Tomas Härdin
   3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
   4  *
   5  * Fixes and improvements, vintage decoders compatibility
   6  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a
   9 copy of this software and associated documentation files (the "Software"),
  10 to deal in the Software without restriction, including without limitation
  11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 and/or sell copies of the Software, and to permit persons to whom the
  13 Software is furnished to do so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice shall be included
  16 in all copies or substantial portions of the Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 OTHER DEALINGS IN THE SOFTWARE.
  25
  26  * MAYBE:
  27  * - "optimally" split the frame into several non-regular areas
  28  *   using a separate codebook pair for each area and approximating
  29  *   the area by several rectangular strips (generally not full width ones)
  30  *   (use quadtree splitting? a simple fixed-granularity grid?)
  31  *
  32  *
  33  * version 2014-01-23 Rl
  34  * - added option handling for flexibility
  35  *
  36  * version 2014-01-21 Rl
  37  * - believe it or not, now we get even smaller files, with better quality
  38  *   (which means I missed an optimization earlier :)
  39  *
  40  * version 2014-01-20 Rl
  41  * - made the encoder compatible with vintage decoders
  42  *   and added some yet unused code for possible future
  43  *   incremental codebook updates
  44  * - fixed a small memory leak
  45  *
  46  * version 2013-04-28 Rl
  47  * - bugfixed codebook optimization logic
  48  *
  49  * version 2013-02-14 Rl
  50  * "Valentine's Day" version:
  51  * - made strip division more robust
  52  * - minimized bruteforcing the number of strips,
  53  *   (costs some R/D but speeds up compession a lot), the heuristic
  54  *   assumption is that score as a function of the number of strips has
  55  *   one wide minimum which moves slowly, of course not fully true
  56  * - simplified codebook generation,
  57  *   the old code was meant for other optimizations than we actually do
  58  * - optimized the codebook generation / error estimation for MODE_MC
  59  *
  60  * version 2013-02-12 Rl
  61  * - separated codebook training sets, avoided the transfer of wasted bytes,
  62  *   which yields both better quality and smaller files
  63  * - now using the correct colorspace (TODO: move conversion to libswscale)
  64  *
  65  * version 2013-02-08 Rl
  66  * - fixes/optimization in multistrip encoding and codebook size choice,
  67  *   quality/bitrate is now better than that of the binary proprietary encoder
  68  */
  69
  70 #include "libavutil/intreadwrite.h"
  71 #include "avcodec.h"
  72 #include "libavutil/lfg.h"
  73 #include "elbg.h"
  74 #include "internal.h"
  75
  76 #include "libavutil/avassert.h"
  77 #include "libavutil/opt.h"
  78
  79 #define CVID_HEADER_SIZE 10
  80 #define STRIP_HEADER_SIZE 12
  81 #define CHUNK_HEADER_SIZE 4
  82
  83 #define MB_SIZE 4           //4x4 MBs
  84 #define MB_AREA (MB_SIZE*MB_SIZE)
  85
  86 #define VECTOR_MAX 6        //six or four entries per vector depending on format
  87 #define CODEBOOK_MAX 256    //size of a codebook
  88
  89 #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
  90 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
  91 // MAX_STRIPS limits the maximum quality you can reach
  92 //            when you want high quality on high resolutions,
  93 // MIN_STRIPS limits the minimum efficiently encodable bit rate
  94 //            on low resolutions
  95 // the numbers are only used for brute force optimization for the first frame,
  96 // for the following frames they are adaptively readjusted
  97 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
  98 // of strips, currently 32
  99
 100 typedef enum {
 101     MODE_V1_ONLY = 0,
 102     MODE_V1_V4,
 103     MODE_MC,
 104
 105     MODE_COUNT,
 106 } CinepakMode;
 107
 108 typedef enum {
 109     ENC_V1,
 110     ENC_V4,
 111     ENC_SKIP,
 112
 113     ENC_UNCERTAIN
 114 } mb_encoding;
 115
 116 typedef struct {
 117     int v1_vector;                  //index into v1 codebook
 118     int v1_error;                   //error when using V1 encoding
 119     int v4_vector[4];               //indices into v4 codebook
 120     int v4_error;                   //error when using V4 encoding
 121     int skip_error;                 //error when block is skipped (aka copied from last frame)
 122     mb_encoding best_encoding;      //last result from calculate_mode_score()
 123 } mb_info;
 124
 125 typedef struct {
 126     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
 127     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
 128     int v1_size;
 129     int v4_size;
 130     CinepakMode mode;
 131 } strip_info;
 132
 133 typedef struct {
 134     const AVClass *class;
 135     AVCodecContext *avctx;
 136     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
 137     AVFrame *last_frame;
 138     AVFrame *best_frame;
 139     AVFrame *scratch_frame;
 140     AVFrame *input_frame;
 141     enum AVPixelFormat pix_fmt;
 142     int w, h;
 143     int frame_buf_size;
 144     int curframe, keyint;
 145     AVLFG randctx;
 146     uint64_t lambda;
 147     int *codebook_input;
 148     int *codebook_closest;
 149     mb_info *mb;                                //MB RD state
 150     int min_strips;          //the current limit
 151     int max_strips;          //the current limit
 152 #ifdef CINEPAKENC_DEBUG
 153     mb_info *best_mb;                           //TODO: remove. only used for printing stats
 154     int num_v1_mode, num_v4_mode, num_mc_mode;
 155     int num_v1_encs, num_v4_encs, num_skips;
 156 #endif
 157 // options
 158     int max_extra_cb_iterations;
 159     int skip_empty_cb;
 160     int min_min_strips;
 161     int max_max_strips;
 162     int strip_number_delta_range;
 163 } CinepakEncContext;
 164
 165 #define OFFSET(x) offsetof(CinepakEncContext, x)
 166 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 167 static const AVOption options[] = {
 168     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
 169     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
 170     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
 171     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
 172     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
 173     { NULL },
 174 };
 175
 176 static const AVClass cinepak_class = {
 177     .class_name = "cinepak",
 178     .item_name  = av_default_item_name,
 179     .option     = options,
 180     .version    = LIBAVUTIL_VERSION_INT,
 181 };
 182
 183 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
 184 {
 185     CinepakEncContext *s = avctx->priv_data;
 186     int x, mb_count, strip_buf_size, frame_buf_size;
 187
 188     if (avctx->width & 3 || avctx->height & 3) {
 189         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
 190                 avctx->width, avctx->height);
 191         return AVERROR(EINVAL);
 192     }
 193
 194     if (s->min_min_strips > s->max_max_strips) {
 195         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
 196                 s->min_min_strips, s->max_max_strips);
 197         return AVERROR(EINVAL);
 198     }
 199
 200     if (!(s->last_frame = av_frame_alloc()))
 201         return AVERROR(ENOMEM);
 202     if (!(s->best_frame = av_frame_alloc()))
 203         goto enomem;
 204     if (!(s->scratch_frame = av_frame_alloc()))
 205         goto enomem;
 206     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 207         if (!(s->input_frame = av_frame_alloc()))
 208             goto enomem;
 209
 210     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 211         goto enomem;
 212
 213     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
 214         goto enomem;
 215
 216     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 217         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 218             goto enomem;
 219
 220     mb_count = avctx->width * avctx->height / MB_AREA;
 221
 222     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
 223     //and full codebooks being replaced in INTER mode,
 224     // which is 34 bits per MB
 225     //and 2*256 extra flag bits per strip
 226     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
 227
 228     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
 229
 230     if (!(s->strip_buf = av_malloc(strip_buf_size)))
 231         goto enomem;
 232
 233     if (!(s->frame_buf = av_malloc(frame_buf_size)))
 234         goto enomem;
 235
 236     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
 237         goto enomem;
 238
 239 #ifdef CINEPAKENC_DEBUG
 240     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
 241         goto enomem;
 242 #endif
 243
 244     av_lfg_init(&s->randctx, 1);
 245     s->avctx = avctx;
 246     s->w = avctx->width;
 247     s->h = avctx->height;
 248     s->frame_buf_size = frame_buf_size;
 249     s->curframe = 0;
 250     s->keyint = avctx->keyint_min;
 251     s->pix_fmt = avctx->pix_fmt;
 252
 253     //set up AVFrames
 254     s->last_frame->data[0]        = s->pict_bufs[0];
 255     s->last_frame->linesize[0]    = s->w;
 256     s->best_frame->data[0]        = s->pict_bufs[1];
 257     s->best_frame->linesize[0]    = s->w;
 258     s->scratch_frame->data[0]     = s->pict_bufs[2];
 259     s->scratch_frame->linesize[0] = s->w;
 260
 261     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
 262         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
 263         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
 264         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
 265
 266         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
 267         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
 268         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
 269
 270         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
 271         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
 272         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
 273
 274         s->input_frame->data[0]       = s->pict_bufs[3];
 275         s->input_frame->linesize[0]   = s->w;
 276         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
 277         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
 278         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
 279     }
 280
 281     s->min_strips = s->min_min_strips;
 282     s->max_strips = s->max_max_strips;
 283
 284 #ifdef CINEPAKENC_DEBUG
 285     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
 286 #endif
 287
 288     return 0;
 289
 290 enomem:
 291     av_frame_free(&s->last_frame);
 292     av_frame_free(&s->best_frame);
 293     av_frame_free(&s->scratch_frame);
 294     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 295         av_frame_free(&s->input_frame);
 296     av_freep(&s->codebook_input);
 297     av_freep(&s->codebook_closest);
 298     av_freep(&s->strip_buf);
 299     av_freep(&s->frame_buf);
 300     av_freep(&s->mb);
 301 #ifdef CINEPAKENC_DEBUG
 302     av_freep(&s->best_mb);
 303 #endif
 304
 305     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 306         av_freep(&s->pict_bufs[x]);
 307
 308     return AVERROR(ENOMEM);
 309 }
 310
 311 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
 312 #ifdef CINEPAK_REPORT_SERR
 313 , int64_t *serr
 314 #endif
 315 )
 316 {
 317     //score = FF_LAMBDA_SCALE * error + lambda * bits
 318     int x;
 319     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 320     int mb_count = s->w * h / MB_AREA;
 321     mb_info *mb;
 322     int64_t score1, score2, score3;
 323     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
 324                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
 325                    CHUNK_HEADER_SIZE) << 3;
 326
 327     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
 328
 329 #ifdef CINEPAK_REPORT_SERR
 330     *serr = 0;
 331 #endif
 332
 333     switch(info->mode) {
 334     case MODE_V1_ONLY:
 335         //one byte per MB
 336         ret += s->lambda * 8 * mb_count;
 337
 338 // while calculating we assume all blocks are ENC_V1
 339         for(x = 0; x < mb_count; x++) {
 340             mb = &s->mb[x];
 341             ret += FF_LAMBDA_SCALE * mb->v1_error;
 342 #ifdef CINEPAK_REPORT_SERR
 343             *serr += mb->v1_error;
 344 #endif
 345 // this function is never called for report in MODE_V1_ONLY
 346 //            if(!report)
 347             mb->best_encoding = ENC_V1;
 348         }
 349
 350         break;
 351     case MODE_V1_V4:
 352         //9 or 33 bits per MB
 353         if(report) {
 354 // no moves between the corresponding training sets are allowed
 355             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
 356             for(x = 0; x < mb_count; x++) {
 357                 int mberr;
 358                 mb = &s->mb[x];
 359                 if(mb->best_encoding == ENC_V1)
 360                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
 361                 else
 362                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
 363                 ret += score1;
 364 #ifdef CINEPAK_REPORT_SERR
 365                 *serr += mberr;
 366 #endif
 367             }
 368         } else { // find best mode per block
 369             for(x = 0; x < mb_count; x++) {
 370                 mb = &s->mb[x];
 371                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
 372                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
 373
 374                 if(score1 <= score2) {
 375                     ret += score1;
 376 #ifdef CINEPAK_REPORT_SERR
 377                     *serr += mb->v1_error;
 378 #endif
 379                     mb->best_encoding = ENC_V1;
 380                 } else {
 381                     ret += score2;
 382 #ifdef CINEPAK_REPORT_SERR
 383                     *serr += mb->v4_error;
 384 #endif
 385                     mb->best_encoding = ENC_V4;
 386                 }
 387             }
 388         }
 389
 390         break;
 391     case MODE_MC:
 392         //1, 10 or 34 bits per MB
 393         if(report) {
 394             int v1_shrunk = 0, v4_shrunk = 0;
 395             for(x = 0; x < mb_count; x++) {
 396                 mb = &s->mb[x];
 397 // it is OK to move blocks to ENC_SKIP here
 398 // but not to any codebook encoding!
 399                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 400                 if(mb->best_encoding == ENC_SKIP) {
 401                     ret += score1;
 402 #ifdef CINEPAK_REPORT_SERR
 403                     *serr += mb->skip_error;
 404 #endif
 405                 } else if(mb->best_encoding == ENC_V1) {
 406                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
 407                         mb->best_encoding = ENC_SKIP;
 408                         ++v1_shrunk;
 409                         ret += score1;
 410 #ifdef CINEPAK_REPORT_SERR
 411                         *serr += mb->skip_error;
 412 #endif
 413                     } else {
 414                         ret += score2;
 415 #ifdef CINEPAK_REPORT_SERR
 416                         *serr += mb->v1_error;
 417 #endif
 418                     }
 419                 } else {
 420                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
 421                         mb->best_encoding = ENC_SKIP;
 422                         ++v4_shrunk;
 423                         ret += score1;
 424 #ifdef CINEPAK_REPORT_SERR
 425                         *serr += mb->skip_error;
 426 #endif
 427                     } else {
 428                         ret += score3;
 429 #ifdef CINEPAK_REPORT_SERR
 430                         *serr += mb->v4_error;
 431 #endif
 432                     }
 433                 }
 434             }
 435             *training_set_v1_shrunk = v1_shrunk;
 436             *training_set_v4_shrunk = v4_shrunk;
 437         } else { // find best mode per block
 438             for(x = 0; x < mb_count; x++) {
 439                 mb = &s->mb[x];
 440                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 441                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
 442                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
 443
 444                 if(score1 <= score2 && score1 <= score3) {
 445                     ret += score1;
 446 #ifdef CINEPAK_REPORT_SERR
 447                     *serr += mb->skip_error;
 448 #endif
 449                     mb->best_encoding = ENC_SKIP;
 450                 } else if(score2 <= score3) {
 451                     ret += score2;
 452 #ifdef CINEPAK_REPORT_SERR
 453                     *serr += mb->v1_error;
 454 #endif
 455                     mb->best_encoding = ENC_V1;
 456                 } else {
 457                     ret += score3;
 458 #ifdef CINEPAK_REPORT_SERR
 459                     *serr += mb->v4_error;
 460 #endif
 461                     mb->best_encoding = ENC_V4;
 462                 }
 463             }
 464         }
 465
 466         break;
 467     }
 468
 469     return ret;
 470 }
 471
 472 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
 473 {
 474     buf[0] = chunk_type;
 475     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
 476     return CHUNK_HEADER_SIZE;
 477 }
 478
 479 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
 480 {
 481     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 482     int incremental_codebook_replacement_mode = 0; // hardcoded here,
 483                 // the compiler should notice that this is a constant -- rl
 484
 485     ret = write_chunk_header(buf,
 486           s->pix_fmt == AV_PIX_FMT_RGB24 ?
 487            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
 488            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
 489           entry_size * size
 490            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
 491
 492 // we do codebook encoding according to the "intra" mode
 493 // but we keep the "dead" code for reference in case we will want
 494 // to use incremental codebook updates (which actually would give us
 495 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
 496 // (of course, the code will be not useful as-is)
 497     if(incremental_codebook_replacement_mode) {
 498         int flags = 0;
 499         int flagsind;
 500         for(x = 0; x < size; x++) {
 501             if(flags == 0) {
 502                 flagsind = ret;
 503                 ret += 4;
 504                 flags = 0x80000000;
 505             } else
 506                 flags = ((flags>>1) | 0x80000000);
 507             for(y = 0; y < entry_size; y++)
 508                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 509             if((flags&0xffffffff) == 0xffffffff) {
 510                 AV_WB32(&buf[flagsind], flags);
 511                 flags = 0;
 512             }
 513         }
 514         if(flags)
 515             AV_WB32(&buf[flagsind], flags);
 516     } else
 517         for(x = 0; x < size; x++)
 518             for(y = 0; y < entry_size; y++)
 519                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 520
 521     return ret;
 522 }
 523
 524 //sets out to the sub picture starting at (x,y) in in
 525 static void get_sub_picture(CinepakEncContext *s, int x, int y,
 526                             uint8_t * in_data[4], int  in_linesize[4],
 527                             uint8_t *out_data[4], int out_linesize[4])
 528 {
 529     out_data[0] = in_data[0] + x + y * in_linesize[0];
 530     out_linesize[0] = in_linesize[0];
 531
 532     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 533         out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
 534         out_linesize[1] = in_linesize[1];
 535
 536         out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
 537         out_linesize[2] = in_linesize[2];
 538     }
 539 }
 540
 541 //decodes the V1 vector in mb into the 4x4 MB pointed to by data
 542 static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
 543                              int linesize[4], int v1_vector, strip_info *info)
 544 {
 545     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 546
 547     data[0][0] =
 548             data[0][1] =
 549             data[0][    linesize[0]] =
 550             data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
 551
 552     data[0][2] =
 553             data[0][3] =
 554             data[0][2+  linesize[0]] =
 555             data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
 556
 557     data[0][2*linesize[0]] =
 558             data[0][1+2*linesize[0]] =
 559             data[0][  3*linesize[0]] =
 560             data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
 561
 562     data[0][2+2*linesize[0]] =
 563             data[0][3+2*linesize[0]] =
 564             data[0][2+3*linesize[0]] =
 565             data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
 566
 567     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 568         data[1][0] =
 569             data[1][1] =
 570             data[1][    linesize[1]] =
 571             data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
 572
 573         data[2][0] =
 574             data[2][1] =
 575             data[2][    linesize[2]] =
 576             data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
 577     }
 578 }
 579
 580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by data
 581 static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
 582                              int linesize[4], int *v4_vector, strip_info *info)
 583 {
 584     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 585
 586     for(i = y = 0; y < 4; y += 2) {
 587         for(x = 0; x < 4; x += 2, i++) {
 588             data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
 589             data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
 590             data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
 591             data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
 592
 593             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 594                 data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
 595                 data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
 596             }
 597         }
 598     }
 599 }
 600
 601 static void copy_mb(CinepakEncContext *s,
 602                     uint8_t *a_data[4], int a_linesize[4],
 603                     uint8_t *b_data[4], int b_linesize[4])
 604 {
 605     int y, p;
 606
 607     for(y = 0; y < MB_SIZE; y++) {
 608         memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
 609                MB_SIZE);
 610     }
 611
 612     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 613         for(p = 1; p <= 2; p++) {
 614             for(y = 0; y < MB_SIZE/2; y++) {
 615                 memcpy(a_data[p] + y*a_linesize[p],
 616                        b_data[p] + y*b_linesize[p],
 617                        MB_SIZE/2);
 618             }
 619         }
 620     }
 621 }
 622
 623 static int encode_mode(CinepakEncContext *s, int h,
 624                        uint8_t *scratch_data[4], int scratch_linesize[4],
 625                        uint8_t *last_data[4], int last_linesize[4],
 626                        strip_info *info, unsigned char *buf)
 627 {
 628     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
 629     int needs_extra_bit, should_write_temp;
 630     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
 631     mb_info *mb;
 632     uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
 633     int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
 634
 635     //encode codebooks
 636 ////// MacOS vintage decoder compatibility dictates the presence of
 637 ////// the codebook chunk even when the codebook is empty - pretty dumb...
 638 ////// and also the certain order of the codebook chunks -- rl
 639     if(info->v4_size || !s->skip_empty_cb)
 640         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
 641
 642     if(info->v1_size || !s->skip_empty_cb)
 643         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
 644
 645     //update scratch picture
 646     for(z = y = 0; y < h; y += MB_SIZE) {
 647         for(x = 0; x < s->w; x += MB_SIZE, z++) {
 648             mb = &s->mb[z];
 649
 650             get_sub_picture(s, x, y, scratch_data, scratch_linesize,
 651                             sub_scratch_data, sub_scratch_linesize);
 652
 653             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
 654                 get_sub_picture(s, x, y,
 655                                 last_data, last_linesize,
 656                                 sub_last_data, sub_last_linesize);
 657                 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
 658                         sub_last_data, sub_last_linesize);
 659             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
 660                 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
 661                                  mb->v1_vector, info);
 662             else
 663                 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
 664                                  mb->v4_vector, info);
 665         }
 666     }
 667
 668     switch(info->mode) {
 669     case MODE_V1_ONLY:
 670         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
 671         ret += write_chunk_header(buf + ret, 0x32, mb_count);
 672
 673         for(x = 0; x < mb_count; x++)
 674             buf[ret++] = s->mb[x].v1_vector;
 675
 676         break;
 677     case MODE_V1_V4:
 678         //remember header position
 679         header_ofs = ret;
 680         ret += CHUNK_HEADER_SIZE;
 681
 682         for(x = 0; x < mb_count; x += 32) {
 683             flags = 0;
 684             for(y = x; y < FFMIN(x+32, mb_count); y++)
 685                 if(s->mb[y].best_encoding == ENC_V4)
 686                     flags |= 1 << (31 - y + x);
 687
 688             AV_WB32(&buf[ret], flags);
 689             ret += 4;
 690
 691             for(y = x; y < FFMIN(x+32, mb_count); y++) {
 692                 mb = &s->mb[y];
 693
 694                 if(mb->best_encoding == ENC_V1)
 695                     buf[ret++] = mb->v1_vector;
 696                 else
 697                     for(z = 0; z < 4; z++)
 698                         buf[ret++] = mb->v4_vector[z];
 699             }
 700         }
 701
 702         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
 703
 704         break;
 705     case MODE_MC:
 706         //remember header position
 707         header_ofs = ret;
 708         ret += CHUNK_HEADER_SIZE;
 709         flags = bits = temp_size = 0;
 710
 711         for(x = 0; x < mb_count; x++) {
 712             mb = &s->mb[x];
 713             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
 714             needs_extra_bit = 0;
 715             should_write_temp = 0;
 716
 717             if(mb->best_encoding != ENC_SKIP) {
 718                 if(bits < 32)
 719                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
 720                 else
 721                     needs_extra_bit = 1;
 722             }
 723
 724             if(bits == 32) {
 725                 AV_WB32(&buf[ret], flags);
 726                 ret += 4;
 727                 flags = bits = 0;
 728
 729                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
 730                     memcpy(&buf[ret], temp, temp_size);
 731                     ret += temp_size;
 732                     temp_size = 0;
 733                 } else
 734                     should_write_temp = 1;
 735             }
 736
 737             if(needs_extra_bit) {
 738                 flags = (mb->best_encoding == ENC_V4) << 31;
 739                 bits = 1;
 740             }
 741
 742             if(mb->best_encoding == ENC_V1)
 743                 temp[temp_size++] = mb->v1_vector;
 744             else if(mb->best_encoding == ENC_V4)
 745                 for(z = 0; z < 4; z++)
 746                     temp[temp_size++] = mb->v4_vector[z];
 747
 748             if(should_write_temp) {
 749                 memcpy(&buf[ret], temp, temp_size);
 750                 ret += temp_size;
 751                 temp_size = 0;
 752             }
 753         }
 754
 755         if(bits > 0) {
 756             AV_WB32(&buf[ret], flags);
 757             ret += 4;
 758             memcpy(&buf[ret], temp, temp_size);
 759             ret += temp_size;
 760         }
 761
 762         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
 763
 764         break;
 765     }
 766
 767     return ret;
 768 }
 769
 770 //computes distortion of 4x4 MB in b compared to a
 771 static int compute_mb_distortion(CinepakEncContext *s,
 772                                  uint8_t *a_data[4], int a_linesize[4],
 773                                  uint8_t *b_data[4], int b_linesize[4])
 774 {
 775     int x, y, p, d, ret = 0;
 776
 777     for(y = 0; y < MB_SIZE; y++) {
 778         for(x = 0; x < MB_SIZE; x++) {
 779             d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
 780             ret += d*d;
 781         }
 782     }
 783
 784     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 785         for(p = 1; p <= 2; p++) {
 786             for(y = 0; y < MB_SIZE/2; y++) {
 787                 for(x = 0; x < MB_SIZE/2; x++) {
 788                     d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
 789                     ret += d*d;
 790                 }
 791             }
 792         }
 793     }
 794
 795     return ret;
 796 }
 797
 798 // return the possibly adjusted size of the codebook
 799 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
 800 static int quantize(CinepakEncContext *s, int h,
 801                     uint8_t *data[4], int linesize[4],
 802                     int v1mode, strip_info *info,
 803                     mb_encoding encoding)
 804 {
 805     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
 806     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 807     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
 808     int size = v1mode ? info->v1_size : info->v4_size;
 809     int64_t total_error = 0;
 810     uint8_t vq_pict_buf[(MB_AREA*3)/2];
 811     uint8_t *sub_data    [4], *vq_data    [4];
 812     int      sub_linesize[4],  vq_linesize[4];
 813
 814     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
 815         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
 816             int *base;
 817
 818             if(CERTAIN(encoding)) {
 819 // use for the training only the blocks known to be to be encoded [sic:-]
 820                if(s->mb[mbn].best_encoding != encoding) continue;
 821             }
 822
 823             base = s->codebook_input + i*entry_size;
 824             if(v1mode) {
 825                 //subsample
 826                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
 827                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
 828                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
 829                         shift = y2 < 4 ? 0 : 1;
 830                         x3 = shift ? 0 : x2;
 831                         y3 = shift ? 0 : y2;
 832                         base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
 833                                    data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
 834                                    data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
 835                                    data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
 836                     }
 837                 }
 838             } else {
 839                 //copy
 840                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
 841                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
 842                         for(k = 0; k < entry_size; k++, j++) {
 843                             plane = k >= 4 ? k - 3 : 0;
 844
 845                             if(k >= 4) {
 846                                 x3 = (x+x2) >> 1;
 847                                 y3 = (y+y2) >> 1;
 848                             } else {
 849                                 x3 = x + x2 + (k & 1);
 850                                 y3 = y + y2 + (k >> 1);
 851                             }
 852
 853                             base[j] = data[plane][x3 + y3*linesize[plane]];
 854                         }
 855                     }
 856                 }
 857             }
 858             i += v1mode ? 1 : 4;
 859         }
 860     }
 861 //    if(i < mbn*(v1mode ? 1 : 4)) {
 862 //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
 863 //    }
 864
 865     if(i == 0) // empty training set, nothing to do
 866         return 0;
 867     if(i < size) {
 868         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
 869         size = i;
 870     }
 871
 872     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 873     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 874
 875     //setup vq_data, which contains a single MB
 876     vq_data[0] = vq_pict_buf;
 877     vq_linesize[0] = MB_SIZE;
 878     vq_data[1] = &vq_pict_buf[MB_AREA];
 879     vq_data[2] = vq_data[1] + (MB_AREA >> 2);
 880     vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
 881
 882     //copy indices
 883     for(i = j = y = 0; y < h; y += MB_SIZE) {
 884         for(x = 0; x < s->w; x += MB_SIZE, j++) {
 885             mb_info *mb = &s->mb[j];
 886 // skip uninteresting blocks if we know their preferred encoding
 887             if(CERTAIN(encoding) && mb->best_encoding != encoding)
 888                 continue;
 889
 890             //point sub_data to current MB
 891             get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
 892
 893             if(v1mode) {
 894                 mb->v1_vector = s->codebook_closest[i];
 895
 896                 //fill in vq_data with V1 data
 897                 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
 898
 899                 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
 900                                                      vq_data, vq_linesize);
 901                 total_error += mb->v1_error;
 902             } else {
 903                 for(k = 0; k < 4; k++)
 904                     mb->v4_vector[k] = s->codebook_closest[i+k];
 905
 906                 //fill in vq_data with V4 data
 907                 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
 908
 909                 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
 910                                                      vq_data, vq_linesize);
 911                 total_error += mb->v4_error;
 912             }
 913             i += v1mode ? 1 : 4;
 914         }
 915     }
 916 // check that we did it right in the beginning of the function
 917     av_assert0(i >= size); // training set is no smaller than the codebook
 918
 919     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
 920
 921     return size;
 922 }
 923
 924 static void calculate_skip_errors(CinepakEncContext *s, int h,
 925                                   uint8_t *last_data[4], int last_linesize[4],
 926                                   uint8_t *data[4], int linesize[4],
 927                                   strip_info *info)
 928 {
 929     int x, y, i;
 930     uint8_t *sub_last_data    [4], *sub_pict_data    [4];
 931     int      sub_last_linesize[4],  sub_pict_linesize[4];
 932
 933     for(i = y = 0; y < h; y += MB_SIZE) {
 934         for(x = 0; x < s->w; x += MB_SIZE, i++) {
 935             get_sub_picture(s, x, y, last_data,     last_linesize,
 936                                  sub_last_data, sub_last_linesize);
 937             get_sub_picture(s, x, y,      data,          linesize,
 938                                  sub_pict_data, sub_pict_linesize);
 939
 940             s->mb[i].skip_error = compute_mb_distortion(s,
 941                                             sub_last_data, sub_last_linesize,
 942                                             sub_pict_data, sub_pict_linesize);
 943         }
 944     }
 945 }
 946
 947 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
 948 {
 949 // actually we are exclusively using intra strip coding (how much can we win
 950 // otherwise? how to choose which part of a codebook to update?),
 951 // keyframes are different only because we disallow ENC_SKIP on them -- rl
 952 // (besides, the logic here used to be inverted: )
 953 //    buf[0] = keyframe ? 0x11: 0x10;
 954     buf[0] = keyframe ? 0x10: 0x11;
 955     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
 956 //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
 957     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
 958     AV_WB16(&buf[6], 0);
 959 //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
 960     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
 961     AV_WB16(&buf[10], s->w);
 962     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 963 }
 964
 965 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
 966                     uint8_t *last_data[4], int last_linesize[4],
 967                     uint8_t *data[4], int linesize[4],
 968                     uint8_t *scratch_data[4], int scratch_linesize[4],
 969                     unsigned char *buf, int64_t *best_score
 970 #ifdef CINEPAK_REPORT_SERR
 971 , int64_t *best_serr
 972 #endif
 973 )
 974 {
 975     int64_t score = 0;
 976 #ifdef CINEPAK_REPORT_SERR
 977     int64_t serr;
 978 #endif
 979     int best_size = 0;
 980     strip_info info;
 981 // for codebook optimization:
 982     int v1enough, v1_size, v4enough, v4_size;
 983     int new_v1_size, new_v4_size;
 984     int v1shrunk, v4shrunk;
 985
 986     if(!keyframe)
 987         calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
 988                               &info);
 989
 990     //try some powers of 4 for the size of the codebooks
 991     //constraint the v4 codebook to be no bigger than v1 one,
 992     //(and no less than v1_size/4)
 993     //thus making v1 preferable and possibly losing small details? should be ok
 994 #define SMALLEST_CODEBOOK 1
 995     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
 996         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
 997             //try all modes
 998             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
 999                 //don't allow MODE_MC in intra frames
1000                 if(keyframe && mode == MODE_MC)
1001                     continue;
1002
1003                 if(mode == MODE_V1_ONLY) {
1004                     info.v1_size = v1_size;
1005 // the size may shrink even before optimizations if the input is short:
1006                     info.v1_size = quantize(s, h, data, linesize, 1,
1007                                             &info, ENC_UNCERTAIN);
1008                     if(info.v1_size < v1_size)
1009 // too few eligible blocks, no sense in trying bigger sizes
1010                         v1enough = 1;
1011
1012                     info.v4_size = 0;
1013                 } else { // mode != MODE_V1_ONLY
1014                     // if v4 codebook is empty then only allow V1-only mode
1015                     if(!v4_size)
1016                         continue;
1017
1018                     if(mode == MODE_V1_V4) {
1019                         info.v4_size = v4_size;
1020                         info.v4_size = quantize(s, h, data, linesize, 0,
1021                                                 &info, ENC_UNCERTAIN);
1022                         if(info.v4_size < v4_size)
1023 // too few eligible blocks, no sense in trying bigger sizes
1024                             v4enough = 1;
1025                     }
1026                 }
1027
1028                 info.mode = mode;
1029 // choose the best encoding per block, based on current experience
1030                 score = calculate_mode_score(s, h, &info, 0,
1031                                              &v1shrunk, &v4shrunk
1032 #ifdef CINEPAK_REPORT_SERR
1033 , &serr
1034 #endif
1035 );
1036
1037                 if(mode != MODE_V1_ONLY){
1038                     int extra_iterations_limit = s->max_extra_cb_iterations;
1039 // recompute the codebooks, omitting the extra blocks
1040 // we assume we _may_ come here with more blocks to encode than before
1041                     info.v1_size = v1_size;
1042                     new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1043                     if(new_v1_size < info.v1_size){
1044                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1045                         info.v1_size = new_v1_size;
1046                     }
1047 // we assume we _may_ come here with more blocks to encode than before
1048                     info.v4_size = v4_size;
1049                     new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1050                     if(new_v4_size < info.v4_size) {
1051                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1052                         info.v4_size = new_v4_size;
1053                     }
1054 // calculate the resulting score
1055 // (do not move blocks to codebook encodings now, as some blocks may have
1056 // got bigger errors despite a smaller training set - but we do not
1057 // ever grow the training sets back)
1058                     for(;;) {
1059                         score = calculate_mode_score(s, h, &info, 1,
1060                                                      &v1shrunk, &v4shrunk
1061 #ifdef CINEPAK_REPORT_SERR
1062 , &serr
1063 #endif
1064 );
1065 // do we have a reason to reiterate? if so, have we reached the limit?
1066                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1067 // recompute the codebooks, omitting the extra blocks
1068                         if(v1shrunk) {
1069                             info.v1_size = v1_size;
1070                             new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1071                             if(new_v1_size < info.v1_size){
1072                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1073                                 info.v1_size = new_v1_size;
1074                             }
1075                         }
1076                         if(v4shrunk) {
1077                             info.v4_size = v4_size;
1078                             new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1079                             if(new_v4_size < info.v4_size) {
1080                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1081                                 info.v4_size = new_v4_size;
1082                             }
1083                         }
1084                     }
1085                 }
1086
1087                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1088
1089                 if(best_size == 0 || score < *best_score) {
1090
1091                     *best_score = score;
1092 #ifdef CINEPAK_REPORT_SERR
1093                     *best_serr = serr;
1094 #endif
1095                     best_size = encode_mode(s, h,
1096                                             scratch_data, scratch_linesize,
1097                                             last_data, last_linesize, &info,
1098                                             s->strip_buf + STRIP_HEADER_SIZE);
1099
1100                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1101                     //av_log(s->avctx, AV_LOG_INFO, "\n");
1102 #ifdef CINEPAK_REPORT_SERR
1103                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1104 #endif
1105
1106 #ifdef CINEPAKENC_DEBUG
1107                     //save MB encoding choices
1108                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1109 #endif
1110
1111                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1112                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1113
1114                 }
1115             }
1116         }
1117     }
1118
1119 #ifdef CINEPAKENC_DEBUG
1120     //gather stats. this will only work properly of MAX_STRIPS == 1
1121     if(best_info.mode == MODE_V1_ONLY) {
1122         s->num_v1_mode++;
1123         s->num_v1_encs += s->w*h/MB_AREA;
1124     } else {
1125         if(best_info.mode == MODE_V1_V4)
1126             s->num_v4_mode++;
1127         else
1128             s->num_mc_mode++;
1129
1130         int x;
1131         for(x = 0; x < s->w*h/MB_AREA; x++)
1132             if(s->best_mb[x].best_encoding == ENC_V1)
1133                 s->num_v1_encs++;
1134             else if(s->best_mb[x].best_encoding == ENC_V4)
1135                 s->num_v4_encs++;
1136             else
1137                 s->num_skips++;
1138     }
1139 #endif
1140
1141     best_size += STRIP_HEADER_SIZE;
1142     memcpy(buf, s->strip_buf, best_size);
1143
1144     return best_size;
1145 }
1146
1147 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1148 {
1149     buf[0] = isakeyframe ? 0 : 1;
1150     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1151     AV_WB16(&buf[4], s->w);
1152     AV_WB16(&buf[6], s->h);
1153     AV_WB16(&buf[8], num_strips);
1154
1155     return CVID_HEADER_SIZE;
1156 }
1157
1158 static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
1159                     int isakeyframe, unsigned char *buf, int buf_size)
1160 {
1161     int num_strips, strip, i, y, nexty, size, temp_size;
1162     uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
1163     int      last_linesize[4],  linesize[4],  scratch_linesize[4];
1164     int64_t best_score = 0, score, score_temp;
1165 #ifdef CINEPAK_REPORT_SERR
1166     int64_t best_serr = 0, serr, serr_temp;
1167 #endif
1168
1169     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1170
1171     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1172         int x;
1173 // build a copy of the given frame in the correct colorspace
1174         for(y = 0; y < s->h; y += 2) {
1175             for(x = 0; x < s->w; x += 2) {
1176                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1177                 ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
1178                 ir[1] = ir[0] + frame->linesize[0];
1179                 get_sub_picture(s, x, y,
1180                                 s->input_frame->data, s->input_frame->linesize,
1181                                 scratch_data, scratch_linesize);
1182                 r = g = b = 0;
1183                 for(i=0; i<4; ++i) {
1184                     int i1, i2;
1185                     i1 = (i&1); i2 = (i>=2);
1186                     rr = ir[i2][i1*3+0];
1187                     gg = ir[i2][i1*3+1];
1188                     bb = ir[i2][i1*3+2];
1189                     r += rr; g += gg; b += bb;
1190 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1191 // "Y"
1192 //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1193                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1194                     if(      rr <   0) rr =   0;
1195                     else if (rr > 255) rr = 255;
1196                     scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
1197                 }
1198 // let us scale down as late as possible
1199 //                r /= 4; g /= 4; b /= 4;
1200 // "U"
1201 //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1202                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1203                 if(      rr < -128) rr = -128;
1204                 else if (rr >  127) rr =  127;
1205                 scratch_data[1][0] = rr + 128; // quantize needs unsigned
1206 // "V"
1207 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1208                 rr = (748893*r - 599156*g - 149737*b) >> 23;
1209                 if(      rr < -128) rr = -128;
1210                 else if (rr >  127) rr =  127;
1211                 scratch_data[2][0] = rr + 128; // quantize needs unsigned
1212             }
1213         }
1214     }
1215
1216     //would be nice but quite certainly incompatible with vintage players:
1217     // support encoding zero strips (meaning skip the whole frame)
1218     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1219         score = 0;
1220         size = 0;
1221 #ifdef CINEPAK_REPORT_SERR
1222         serr = 0;
1223 #endif
1224
1225         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1226             int strip_height;
1227
1228             nexty = strip * s->h / num_strips; // <= s->h
1229             //make nexty the next multiple of 4 if not already there
1230             if(nexty & 3)
1231                 nexty += 4 - (nexty & 3);
1232
1233             strip_height = nexty - y;
1234             if(strip_height <= 0) { // can this ever happen?
1235                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1236                 continue;
1237             }
1238
1239             if(s->pix_fmt == AV_PIX_FMT_RGB24)
1240                 get_sub_picture(s, 0, y,
1241                                 s->input_frame->data, s->input_frame->linesize,
1242                                 data, linesize);
1243             else
1244                 get_sub_picture(s, 0, y,
1245                                 (uint8_t **)frame->data, (int*)frame->linesize,
1246                                 data, linesize);
1247             get_sub_picture(s, 0, y,
1248                             s->last_frame->data, s->last_frame->linesize,
1249                             last_data, last_linesize);
1250             get_sub_picture(s, 0, y,
1251                             s->scratch_frame->data, s->scratch_frame->linesize,
1252                             scratch_data, scratch_linesize);
1253
1254             if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
1255                                      last_data, last_linesize, data, linesize,
1256                                      scratch_data, scratch_linesize,
1257                                      s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1258 #ifdef CINEPAK_REPORT_SERR
1259 , &serr_temp
1260 #endif
1261 )) < 0)
1262                 return temp_size;
1263
1264             score += score_temp;
1265 #ifdef CINEPAK_REPORT_SERR
1266             serr += serr_temp;
1267 #endif
1268             size += temp_size;
1269             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1270             //av_log(s->avctx, AV_LOG_INFO, "\n");
1271         }
1272
1273         if(best_score == 0 || score < best_score) {
1274             best_score = score;
1275 #ifdef CINEPAK_REPORT_SERR
1276             best_serr = serr;
1277 #endif
1278             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1279             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1280 #ifdef CINEPAK_REPORT_SERR
1281             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1282 #endif
1283
1284             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1285             memcpy(buf, s->frame_buf, best_size);
1286             best_nstrips = num_strips;
1287         }
1288 // avoid trying too many strip numbers without a real reason
1289 // (this makes the processing of the very first frame faster)
1290         if(num_strips - best_nstrips > 4)
1291             break;
1292     }
1293
1294     av_assert0(best_nstrips >= 0 && best_size >= 0);
1295
1296 // let the number of strips slowly adapt to the changes in the contents,
1297 // compared to full bruteforcing every time this will occasionally lead
1298 // to some r/d performance loss but makes encoding up to several times faster
1299     if(!s->strip_number_delta_range) {
1300         if(best_nstrips == s->max_strips) { // let us try to step up
1301             s->max_strips = best_nstrips + 1;
1302             if(s->max_strips >= s->max_max_strips)
1303                 s->max_strips = s->max_max_strips;
1304         } else { // try to step down
1305             s->max_strips = best_nstrips;
1306         }
1307         s->min_strips = s->max_strips - 1;
1308         if(s->min_strips < s->min_min_strips)
1309             s->min_strips = s->min_min_strips;
1310     } else {
1311         s->max_strips = best_nstrips + s->strip_number_delta_range;
1312         if(s->max_strips >= s->max_max_strips)
1313             s->max_strips = s->max_max_strips;
1314         s->min_strips = best_nstrips - s->strip_number_delta_range;
1315         if(s->min_strips < s->min_min_strips)
1316             s->min_strips = s->min_min_strips;
1317     }
1318
1319     return best_size;
1320 }
1321
1322 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1323                                 const AVFrame *frame, int *got_packet)
1324 {
1325     CinepakEncContext *s = avctx->priv_data;
1326     int ret;
1327
1328     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1329
1330     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1331         return ret;
1332     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1333     pkt->size = ret;
1334     if (s->curframe == 0)
1335         pkt->flags |= AV_PKT_FLAG_KEY;
1336     *got_packet = 1;
1337
1338     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1339
1340     if (++s->curframe >= s->keyint)
1341         s->curframe = 0;
1342
1343     return 0;
1344 }
1345
1346 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1347 {
1348     CinepakEncContext *s = avctx->priv_data;
1349     int x;
1350
1351     av_frame_free(&s->last_frame);
1352     av_frame_free(&s->best_frame);
1353     av_frame_free(&s->scratch_frame);
1354     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1355         av_frame_free(&s->input_frame);
1356     av_freep(&s->codebook_input);
1357     av_freep(&s->codebook_closest);
1358     av_freep(&s->strip_buf);
1359     av_freep(&s->frame_buf);
1360     av_freep(&s->mb);
1361 #ifdef CINEPAKENC_DEBUG
1362     av_freep(&s->best_mb);
1363 #endif
1364
1365     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1366         av_freep(&s->pict_bufs[x]);
1367
1368 #ifdef CINEPAKENC_DEBUG
1369     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1370         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1371 #endif
1372
1373     return 0;
1374 }
1375
1376 AVCodec ff_cinepak_encoder = {
1377     .name           = "cinepak",
1378     .type           = AVMEDIA_TYPE_VIDEO,
1379     .id             = AV_CODEC_ID_CINEPAK,
1380     .priv_data_size = sizeof(CinepakEncContext),
1381     .init           = cinepak_encode_init,
1382     .encode2        = cinepak_encode_frame,
1383     .close          = cinepak_encode_end,
1384     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1385     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak"),
1386     .priv_class     = &cinepak_class,
1387 };