1/*
2 * Cinepak encoder (c) 2011 Tomas H�rdin
3 * http://titan.codemill.se/~tomhar/cinepakenc.patch
4 *
5 * Fixes and improvements, vintage decoders compatibility
6 *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
7
8Permission is hereby granted, free of charge, to any person obtaining a
9copy of this software and associated documentation files (the "Software"),
10to deal in the Software without restriction, including without limitation
11the rights to use, copy, modify, merge, publish, distribute, sublicense,
12and/or sell copies of the Software, and to permit persons to whom the
13Software is furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included
16in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24OTHER DEALINGS IN THE SOFTWARE.
25
26 * TODO:
27 * - optimize: color space conversion, ...
28 * - implement options to set the min/max number of strips?
29 * MAYBE:
30 * - "optimally" split the frame into several non-regular areas
31 *   using a separate codebook pair for each area and approximating
32 *   the area by several rectangular strips (generally not full width ones)
33 *   (use quadtree splitting? a simple fixed-granularity grid?)
34 *
35 *
36 * version 2014-01-23 Rl
37 * - added option handling for flexibility
38 *
39 * version 2014-01-21 Rl
40 * - believe it or not, now we get even smaller files, with better quality
41 *   (which means I missed an optimization earlier :)
42 *
43 * version 2014-01-20 Rl
44 * - made the encoder compatible with vintage decoders
45 *   and added some yet unused code for possible future
46 *   incremental codebook updates
47 * - fixed a small memory leak
48 *
49 * version 2013-04-28 Rl
50 * - bugfixed codebook optimization logic
51 *
52 * version 2013-02-14 Rl
53 * "Valentine's Day" version:
54 * - made strip division more robust
55 * - minimized bruteforcing the number of strips,
56 *   (costs some R/D but speeds up compession a lot), the heuristic
57 *   assumption is that score as a function of the number of strips has
58 *   one wide minimum which moves slowly, of course not fully true
59 * - simplified codebook generation,
60 *   the old code was meant for other optimizations than we actually do
61 * - optimized the codebook generation / error estimation for MODE_MC
62 *
63 * version 2013-02-12 Rl
64 * - separated codebook training sets, avoided the transfer of wasted bytes,
65 *   which yields both better quality and smaller files
66 * - now using the correct colorspace (TODO: move conversion to libswscale)
67 *
68 * version 2013-02-08 Rl
69 * - fixes/optimization in multistrip encoding and codebook size choice,
70 *   quality/bitrate is now better than that of the binary proprietary encoder
71 */
72
73#include "libavutil/intreadwrite.h"
74#include "avcodec.h"
75#include "libavutil/lfg.h"
76#include "elbg.h"
77#include "internal.h"
78
79#include "libavutil/avassert.h"
80#include "libavutil/opt.h"
81
82#define CVID_HEADER_SIZE 10
83#define STRIP_HEADER_SIZE 12
84#define CHUNK_HEADER_SIZE 4
85
86#define MB_SIZE 4           //4x4 MBs
87#define MB_AREA (MB_SIZE*MB_SIZE)
88
89#define VECTOR_MAX 6        //six or four entries per vector depending on format
90#define CODEBOOK_MAX 256    //size of a codebook
91
92#define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
93#define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
94// MAX_STRIPS limits the maximum quality you can reach
95//            when you want hight quality on high resolutions,
96// MIN_STRIPS limits the minimum efficiently encodable bit rate
97//            on low resolutions
98// the numbers are only used for brute force optimization for the first frame,
99// for the following frames they are adaptively readjusted
100// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
101// of strips, currently 32
102
103typedef enum {
104    MODE_V1_ONLY = 0,
105    MODE_V1_V4,
106    MODE_MC,
107
108    MODE_COUNT,
109} CinepakMode;
110
111typedef enum {
112    ENC_V1,
113    ENC_V4,
114    ENC_SKIP,
115
116    ENC_UNCERTAIN
117} mb_encoding;
118
119typedef struct {
120    int v1_vector;                  //index into v1 codebook
121    int v1_error;                   //error when using V1 encoding
122    int v4_vector[4];               //indices into v4 codebooks
123    int v4_error;                   //error when using V4 encoding
124    int skip_error;                 //error when block is skipped (aka copied from last frame)
125    mb_encoding best_encoding;      //last result from calculate_mode_score()
126} mb_info;
127
128typedef struct {
129    int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
130    int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
131    int v1_size;
132    int v4_size;
133    CinepakMode mode;
134} strip_info;
135
136typedef struct {
137    const AVClass *class;
138    AVCodecContext *avctx;
139    unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
140    AVFrame *last_frame;
141    AVFrame *best_frame;
142    AVFrame *scratch_frame;
143    AVFrame *input_frame;
144    enum AVPixelFormat pix_fmt;
145    int w, h;
146    int frame_buf_size;
147    int curframe, keyint;
148    AVLFG randctx;
149    uint64_t lambda;
150    int *codebook_input;
151    int *codebook_closest;
152    mb_info *mb;                                //MB RD state
153    int min_strips;          //the current limit
154    int max_strips;          //the current limit
155#ifdef CINEPAKENC_DEBUG
156    mb_info *best_mb;                           //TODO: remove. only used for printing stats
157    int num_v1_mode, num_v4_mode, num_mc_mode;
158    int num_v1_encs, num_v4_encs, num_skips;
159#endif
160// options
161    int max_extra_cb_iterations;
162    int skip_empty_cb;
163    int min_min_strips;
164    int max_max_strips;
165    int strip_number_delta_range;
166} CinepakEncContext;
167
168#define OFFSET(x) offsetof(CinepakEncContext, x)
169#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
170static const AVOption options[] = {
171    { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
172    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
173    { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
174    { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
175    { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
176    { NULL },
177};
178
179static const AVClass cinepak_class = {
180    .class_name = "cinepak",
181    .item_name  = av_default_item_name,
182    .option     = options,
183    .version    = LIBAVUTIL_VERSION_INT,
184};
185
186static av_cold int cinepak_encode_init(AVCodecContext *avctx)
187{
188    CinepakEncContext *s = avctx->priv_data;
189    int x, mb_count, strip_buf_size, frame_buf_size;
190
191    if (avctx->width & 3 || avctx->height & 3) {
192        av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
193                avctx->width, avctx->height);
194        return AVERROR(EINVAL);
195    }
196
197    if (s->min_min_strips > s->max_max_strips) {
198        av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
199                s->min_min_strips, s->max_max_strips);
200        return AVERROR(EINVAL);
201    }
202
203    if (!(s->last_frame = av_frame_alloc()))
204        return AVERROR(ENOMEM);
205    if (!(s->best_frame = av_frame_alloc()))
206        goto enomem;
207    if (!(s->scratch_frame = av_frame_alloc()))
208        goto enomem;
209    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
210        if (!(s->input_frame = av_frame_alloc()))
211            goto enomem;
212
213    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
214        goto enomem;
215
216    if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
217        goto enomem;
218
219    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
220        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
221            goto enomem;
222
223    mb_count = avctx->width * avctx->height / MB_AREA;
224
225    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
226    //and full codebooks being replaced in INTER mode,
227    // which is 34 bits per MB
228    //and 2*256 extra flag bits per strip
229    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
230
231    frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
232
233    if (!(s->strip_buf = av_malloc(strip_buf_size)))
234        goto enomem;
235
236    if (!(s->frame_buf = av_malloc(frame_buf_size)))
237        goto enomem;
238
239    if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
240        goto enomem;
241
242#ifdef CINEPAKENC_DEBUG
243    if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
244        goto enomem;
245#endif
246
247    av_lfg_init(&s->randctx, 1);
248    s->avctx = avctx;
249    s->w = avctx->width;
250    s->h = avctx->height;
251    s->frame_buf_size = frame_buf_size;
252    s->curframe = 0;
253    s->keyint = avctx->keyint_min;
254    s->pix_fmt = avctx->pix_fmt;
255
256    //set up AVFrames
257    s->last_frame->data[0]        = s->pict_bufs[0];
258    s->last_frame->linesize[0]    = s->w;
259    s->best_frame->data[0]        = s->pict_bufs[1];
260    s->best_frame->linesize[0]    = s->w;
261    s->scratch_frame->data[0]     = s->pict_bufs[2];
262    s->scratch_frame->linesize[0] = s->w;
263
264    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
265        s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
266        s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
267        s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
268
269        s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
270        s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
271        s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
272
273        s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
274        s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
275        s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
276
277        s->input_frame->data[0]       = s->pict_bufs[3];
278        s->input_frame->linesize[0]   = s->w;
279        s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
280        s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
281        s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
282    }
283
284    s->min_strips = s->min_min_strips;
285    s->max_strips = s->max_max_strips;
286
287#ifdef CINEPAKENC_DEBUG
288    s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
289#endif
290
291    return 0;
292
293enomem:
294    av_frame_free(&s->last_frame);
295    av_frame_free(&s->best_frame);
296    av_frame_free(&s->scratch_frame);
297    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
298        av_frame_free(&s->input_frame);
299    av_freep(&s->codebook_input);
300    av_freep(&s->codebook_closest);
301    av_freep(&s->strip_buf);
302    av_freep(&s->frame_buf);
303    av_freep(&s->mb);
304#ifdef CINEPAKENC_DEBUG
305    av_freep(&s->best_mb);
306#endif
307
308    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
309        av_freep(&s->pict_bufs[x]);
310
311    return AVERROR(ENOMEM);
312}
313
314static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
315#ifdef CINEPAK_REPORT_SERR
316, int64_t *serr
317#endif
318)
319{
320    //score = FF_LAMBDA_SCALE * error + lambda * bits
321    int x;
322    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
323    int mb_count = s->w * h / MB_AREA;
324    mb_info *mb;
325    int64_t score1, score2, score3;
326    int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
327                   (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
328                   CHUNK_HEADER_SIZE) << 3;
329
330    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
331
332#ifdef CINEPAK_REPORT_SERR
333    *serr = 0;
334#endif
335
336    switch(info->mode) {
337    case MODE_V1_ONLY:
338        //one byte per MB
339        ret += s->lambda * 8 * mb_count;
340
341// while calculating we assume all blocks are ENC_V1
342        for(x = 0; x < mb_count; x++) {
343            mb = &s->mb[x];
344            ret += FF_LAMBDA_SCALE * mb->v1_error;
345#ifdef CINEPAK_REPORT_SERR
346            *serr += mb->v1_error;
347#endif
348// this function is never called for report in MODE_V1_ONLY
349//            if(!report)
350            mb->best_encoding = ENC_V1;
351        }
352
353        break;
354    case MODE_V1_V4:
355        //9 or 33 bits per MB
356        if(report) {
357// no moves between the corresponding training sets are allowed
358            *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
359            for(x = 0; x < mb_count; x++) {
360                int mberr;
361                mb = &s->mb[x];
362                if(mb->best_encoding == ENC_V1)
363                    score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
364                else
365                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
366                ret += score1;
367#ifdef CINEPAK_REPORT_SERR
368                *serr += mberr;
369#endif
370            }
371        } else { // find best mode per block
372            for(x = 0; x < mb_count; x++) {
373                mb = &s->mb[x];
374                score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
375                score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
376
377                if(score1 <= score2) {
378                    ret += score1;
379#ifdef CINEPAK_REPORT_SERR
380                    *serr += mb->v1_error;
381#endif
382                    mb->best_encoding = ENC_V1;
383                } else {
384                    ret += score2;
385#ifdef CINEPAK_REPORT_SERR
386                    *serr += mb->v4_error;
387#endif
388                    mb->best_encoding = ENC_V4;
389                }
390            }
391        }
392
393        break;
394    case MODE_MC:
395        //1, 10 or 34 bits per MB
396        if(report) {
397            int v1_shrunk = 0, v4_shrunk = 0;
398            for(x = 0; x < mb_count; x++) {
399                mb = &s->mb[x];
400// it is OK to move blocks to ENC_SKIP here
401// but not to any codebook encoding!
402                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
403                if(mb->best_encoding == ENC_SKIP) {
404                    ret += score1;
405#ifdef CINEPAK_REPORT_SERR
406                    *serr += mb->skip_error;
407#endif
408                } else if(mb->best_encoding == ENC_V1) {
409                    if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
410                        mb->best_encoding = ENC_SKIP;
411                        ++v1_shrunk;
412                        ret += score1;
413#ifdef CINEPAK_REPORT_SERR
414                        *serr += mb->skip_error;
415#endif
416                    } else {
417                        ret += score2;
418#ifdef CINEPAK_REPORT_SERR
419                        *serr += mb->v1_error;
420#endif
421                    }
422                } else {
423                    if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
424                        mb->best_encoding = ENC_SKIP;
425                        ++v4_shrunk;
426                        ret += score1;
427#ifdef CINEPAK_REPORT_SERR
428                        *serr += mb->skip_error;
429#endif
430                    } else {
431                        ret += score3;
432#ifdef CINEPAK_REPORT_SERR
433                        *serr += mb->v4_error;
434#endif
435                    }
436                }
437            }
438            *training_set_v1_shrunk = v1_shrunk;
439            *training_set_v4_shrunk = v4_shrunk;
440        } else { // find best mode per block
441            for(x = 0; x < mb_count; x++) {
442                mb = &s->mb[x];
443                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
444                score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
445                score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
446
447                if(score1 <= score2 && score1 <= score3) {
448                    ret += score1;
449#ifdef CINEPAK_REPORT_SERR
450                    *serr += mb->skip_error;
451#endif
452                    mb->best_encoding = ENC_SKIP;
453                } else if(score2 <= score3) {
454                    ret += score2;
455#ifdef CINEPAK_REPORT_SERR
456                    *serr += mb->v1_error;
457#endif
458                    mb->best_encoding = ENC_V1;
459                } else {
460                    ret += score3;
461#ifdef CINEPAK_REPORT_SERR
462                    *serr += mb->v4_error;
463#endif
464                    mb->best_encoding = ENC_V4;
465                }
466            }
467        }
468
469        break;
470    }
471
472    return ret;
473}
474
475static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
476{
477    buf[0] = chunk_type;
478    AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
479    return CHUNK_HEADER_SIZE;
480}
481
482static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
483{
484    int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
485    int incremental_codebook_replacement_mode = 0; // hardcoded here,
486                // the compiler should notice that this is a constant -- rl
487
488    ret = write_chunk_header(buf,
489          s->pix_fmt == AV_PIX_FMT_RGB24 ?
490           chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
491           chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
492          entry_size * size
493           + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
494
495// we do codebook encoding according to the "intra" mode
496// but we keep the "dead" code for reference in case we will want
497// to use incremental codebook updates (which actually would give us
498// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
499// (of course, the code will be not useful as-is)
500    if(incremental_codebook_replacement_mode) {
501        int flags = 0;
502        int flagsind;
503        for(x = 0; x < size; x++) {
504            if(flags == 0) {
505                flagsind = ret;
506                ret += 4;
507                flags = 0x80000000;
508            } else
509                flags = ((flags>>1) | 0x80000000);
510            for(y = 0; y < entry_size; y++)
511                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
512            if((flags&0xffffffff) == 0xffffffff) {
513                AV_WB32(&buf[flagsind], flags);
514                flags = 0;
515            }
516        }
517        if(flags)
518            AV_WB32(&buf[flagsind], flags);
519    } else
520        for(x = 0; x < size; x++)
521            for(y = 0; y < entry_size; y++)
522                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
523
524    return ret;
525}
526
527//sets out to the sub picture starting at (x,y) in in
528static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
529{
530    out->data[0] = in->data[0] + x + y * in->linesize[0];
531    out->linesize[0] = in->linesize[0];
532
533    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
534        out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
535        out->linesize[1] = in->linesize[1];
536
537        out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
538        out->linesize[2] = in->linesize[2];
539    }
540}
541
542//decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
543static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
544{
545    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546
547    sub_pict->data[0][0] =
548            sub_pict->data[0][1] =
549            sub_pict->data[0][    sub_pict->linesize[0]] =
550            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551
552    sub_pict->data[0][2] =
553            sub_pict->data[0][3] =
554            sub_pict->data[0][2+  sub_pict->linesize[0]] =
555            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556
557    sub_pict->data[0][2*sub_pict->linesize[0]] =
558            sub_pict->data[0][1+2*sub_pict->linesize[0]] =
559            sub_pict->data[0][  3*sub_pict->linesize[0]] =
560            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561
562    sub_pict->data[0][2+2*sub_pict->linesize[0]] =
563            sub_pict->data[0][3+2*sub_pict->linesize[0]] =
564            sub_pict->data[0][2+3*sub_pict->linesize[0]] =
565            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566
567    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568        sub_pict->data[1][0] =
569            sub_pict->data[1][1] =
570            sub_pict->data[1][    sub_pict->linesize[1]] =
571            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572
573        sub_pict->data[2][0] =
574            sub_pict->data[2][1] =
575            sub_pict->data[2][    sub_pict->linesize[2]] =
576            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577    }
578}
579
580//decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
581static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
582{
583    int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
584
585    for(i = y = 0; y < 4; y += 2) {
586        for(x = 0; x < 4; x += 2, i++) {
587            sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
588            sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
589            sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
590            sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
591
592            if(s->pix_fmt == AV_PIX_FMT_RGB24) {
593                sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
594                sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
595            }
596        }
597    }
598}
599
600static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
601{
602    int y, p;
603
604    for(y = 0; y < MB_SIZE; y++) {
605        memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
606               MB_SIZE);
607    }
608
609    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
610        for(p = 1; p <= 2; p++) {
611            for(y = 0; y < MB_SIZE/2; y++) {
612                memcpy(a->data[p] + y*a->linesize[p],
613                       b->data[p] + y*b->linesize[p],
614                       MB_SIZE/2);
615            }
616        }
617    }
618}
619
620static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
621{
622    int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
623    int needs_extra_bit, should_write_temp;
624    unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
625    mb_info *mb;
626    AVPicture sub_scratch = {{0}}, sub_last = {{0}};
627
628    //encode codebooks
629////// MacOS vintage decoder compatibility dictates the presence of
630////// the codebook chunk even when the codebook is empty - pretty dumb...
631////// and also the certain order of the codebook chunks -- rl
632    if(info->v4_size || !s->skip_empty_cb)
633        ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
634
635    if(info->v1_size || !s->skip_empty_cb)
636        ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
637
638    //update scratch picture
639    for(z = y = 0; y < h; y += MB_SIZE) {
640        for(x = 0; x < s->w; x += MB_SIZE, z++) {
641            mb = &s->mb[z];
642
643            get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
644
645            if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
646                get_sub_picture(s, x, y, last_pict, &sub_last);
647                copy_mb(s, &sub_scratch, &sub_last);
648            } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
649                decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
650            else
651                decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
652        }
653    }
654
655    switch(info->mode) {
656    case MODE_V1_ONLY:
657        //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
658        ret += write_chunk_header(buf + ret, 0x32, mb_count);
659
660        for(x = 0; x < mb_count; x++)
661            buf[ret++] = s->mb[x].v1_vector;
662
663        break;
664    case MODE_V1_V4:
665        //remember header position
666        header_ofs = ret;
667        ret += CHUNK_HEADER_SIZE;
668
669        for(x = 0; x < mb_count; x += 32) {
670            flags = 0;
671            for(y = x; y < FFMIN(x+32, mb_count); y++)
672                if(s->mb[y].best_encoding == ENC_V4)
673                    flags |= 1 << (31 - y + x);
674
675            AV_WB32(&buf[ret], flags);
676            ret += 4;
677
678            for(y = x; y < FFMIN(x+32, mb_count); y++) {
679                mb = &s->mb[y];
680
681                if(mb->best_encoding == ENC_V1)
682                    buf[ret++] = mb->v1_vector;
683                else
684                    for(z = 0; z < 4; z++)
685                        buf[ret++] = mb->v4_vector[z];
686            }
687        }
688
689        write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
690
691        break;
692    case MODE_MC:
693        //remember header position
694        header_ofs = ret;
695        ret += CHUNK_HEADER_SIZE;
696        flags = bits = temp_size = 0;
697
698        for(x = 0; x < mb_count; x++) {
699            mb = &s->mb[x];
700            flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
701            needs_extra_bit = 0;
702            should_write_temp = 0;
703
704            if(mb->best_encoding != ENC_SKIP) {
705                if(bits < 32)
706                    flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
707                else
708                    needs_extra_bit = 1;
709            }
710
711            if(bits == 32) {
712                AV_WB32(&buf[ret], flags);
713                ret += 4;
714                flags = bits = 0;
715
716                if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
717                    memcpy(&buf[ret], temp, temp_size);
718                    ret += temp_size;
719                    temp_size = 0;
720                } else
721                    should_write_temp = 1;
722            }
723
724            if(needs_extra_bit) {
725                flags = (mb->best_encoding == ENC_V4) << 31;
726                bits = 1;
727            }
728
729            if(mb->best_encoding == ENC_V1)
730                temp[temp_size++] = mb->v1_vector;
731            else if(mb->best_encoding == ENC_V4)
732                for(z = 0; z < 4; z++)
733                    temp[temp_size++] = mb->v4_vector[z];
734
735            if(should_write_temp) {
736                memcpy(&buf[ret], temp, temp_size);
737                ret += temp_size;
738                temp_size = 0;
739            }
740        }
741
742        if(bits > 0) {
743            AV_WB32(&buf[ret], flags);
744            ret += 4;
745            memcpy(&buf[ret], temp, temp_size);
746            ret += temp_size;
747        }
748
749        write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
750
751        break;
752    }
753
754    return ret;
755}
756
757//computes distortion of 4x4 MB in b compared to a
758static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
759{
760    int x, y, p, d, ret = 0;
761
762    for(y = 0; y < MB_SIZE; y++) {
763        for(x = 0; x < MB_SIZE; x++) {
764            d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
765            ret += d*d;
766        }
767    }
768
769    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
770        for(p = 1; p <= 2; p++) {
771            for(y = 0; y < MB_SIZE/2; y++) {
772                for(x = 0; x < MB_SIZE/2; x++) {
773                    d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
774                    ret += d*d;
775                }
776            }
777        }
778    }
779
780    return ret;
781}
782
783// return the possibly adjusted size of the codebook
784#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
785static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
786                    int v1mode, strip_info *info,
787                    mb_encoding encoding)
788{
789    int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
790    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
791    int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
792    int size = v1mode ? info->v1_size : info->v4_size;
793    int64_t total_error = 0;
794    uint8_t vq_pict_buf[(MB_AREA*3)/2];
795    AVPicture sub_pict, vq_pict;
796
797    for(mbn = i = y = 0; y < h; y += MB_SIZE) {
798        for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
799            int *base;
800
801            if(CERTAIN(encoding)) {
802// use for the training only the blocks known to be to be encoded [sic:-]
803               if(s->mb[mbn].best_encoding != encoding) continue;
804            }
805
806            base = s->codebook_input + i*entry_size;
807            if(v1mode) {
808                //subsample
809                for(j = y2 = 0; y2 < entry_size; y2 += 2) {
810                    for(x2 = 0; x2 < 4; x2 += 2, j++) {
811                        plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
812                        shift = y2 < 4 ? 0 : 1;
813                        x3 = shift ? 0 : x2;
814                        y3 = shift ? 0 : y2;
815                        base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
816                                   pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
817                                   pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
818                                   pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
819                    }
820                }
821            } else {
822                //copy
823                for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
824                    for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
825                        for(k = 0; k < entry_size; k++, j++) {
826                            plane = k >= 4 ? k - 3 : 0;
827
828                            if(k >= 4) {
829                                x3 = (x+x2) >> 1;
830                                y3 = (y+y2) >> 1;
831                            } else {
832                                x3 = x + x2 + (k & 1);
833                                y3 = y + y2 + (k >> 1);
834                            }
835
836                            base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
837                        }
838                    }
839                }
840            }
841            i += v1mode ? 1 : 4;
842        }
843    }
844//    if(i < mbn*(v1mode ? 1 : 4)) {
845//        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
846//    }
847
848    if(i == 0) // empty training set, nothing to do
849        return 0;
850    if(i < size) {
851        //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
852        size = i;
853    }
854
855    avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
856    avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
857
858    //setup vq_pict, which contains a single MB
859    vq_pict.data[0] = vq_pict_buf;
860    vq_pict.linesize[0] = MB_SIZE;
861    vq_pict.data[1] = &vq_pict_buf[MB_AREA];
862    vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
863    vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
864
865    //copy indices
866    for(i = j = y = 0; y < h; y += MB_SIZE) {
867        for(x = 0; x < s->w; x += MB_SIZE, j++) {
868            mb_info *mb = &s->mb[j];
869// skip uninteresting blocks if we know their preferred encoding
870            if(CERTAIN(encoding) && mb->best_encoding != encoding)
871                continue;
872
873            //point sub_pict to current MB
874            get_sub_picture(s, x, y, pict, &sub_pict);
875
876            if(v1mode) {
877                mb->v1_vector = s->codebook_closest[i];
878
879                //fill in vq_pict with V1 data
880                decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
881
882                mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
883                total_error += mb->v1_error;
884            } else {
885                for(k = 0; k < 4; k++)
886                    mb->v4_vector[k] = s->codebook_closest[i+k];
887
888                //fill in vq_pict with V4 data
889                decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
890
891                mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
892                total_error += mb->v4_error;
893            }
894            i += v1mode ? 1 : 4;
895        }
896    }
897// check that we did it right in the beginning of the function
898    av_assert0(i >= size); // training set is no smaller than the codebook
899
900    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
901
902    return size;
903}
904
905static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
906{
907    int x, y, i;
908    AVPicture sub_last, sub_pict;
909
910    for(i = y = 0; y < h; y += MB_SIZE) {
911        for(x = 0; x < s->w; x += MB_SIZE, i++) {
912            get_sub_picture(s, x, y, last_pict, &sub_last);
913            get_sub_picture(s, x, y, pict,      &sub_pict);
914
915            s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
916        }
917    }
918}
919
920static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
921{
922// actually we are exclusively using intra strip coding (how much can we win
923// otherwise? how to choose which part of a codebook to update?),
924// keyframes are different only because we disallow ENC_SKIP on them -- rl
925// (besides, the logic here used to be inverted: )
926//    buf[0] = keyframe ? 0x11: 0x10;
927    buf[0] = keyframe ? 0x10: 0x11;
928    AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
929//    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
930    AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
931    AV_WB16(&buf[6], 0);
932//    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
933    AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
934    AV_WB16(&buf[10], s->w);
935    //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
936}
937
938static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
939#ifdef CINEPAK_REPORT_SERR
940, int64_t *best_serr
941#endif
942)
943{
944    int64_t score = 0;
945#ifdef CINEPAK_REPORT_SERR
946    int64_t serr;
947#endif
948    int best_size = 0;
949    strip_info info;
950// for codebook optimization:
951    int v1enough, v1_size, v4enough, v4_size;
952    int new_v1_size, new_v4_size;
953    int v1shrunk, v4shrunk;
954
955    if(!keyframe)
956        calculate_skip_errors(s, h, last_pict, pict, &info);
957
958    //try some powers of 4 for the size of the codebooks
959    //constraint the v4 codebook to be no bigger than v1 one,
960    //(and no less than v1_size/4)
961    //thus making v1 preferable and possibly losing small details? should be ok
962#define SMALLEST_CODEBOOK 1
963    for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
964        for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
965            //try all modes
966            for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
967                //don't allow MODE_MC in intra frames
968                if(keyframe && mode == MODE_MC)
969                    continue;
970
971                if(mode == MODE_V1_ONLY) {
972                    info.v1_size = v1_size;
973// the size may shrink even before optimizations if the input is short:
974                    info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
975                    if(info.v1_size < v1_size)
976// too few eligible blocks, no sense in trying bigger sizes
977                        v1enough = 1;
978
979                    info.v4_size = 0;
980                } else { // mode != MODE_V1_ONLY
981                    // if v4 codebook is empty then only allow V1-only mode
982                    if(!v4_size)
983                        continue;
984
985                    if(mode == MODE_V1_V4) {
986                        info.v4_size = v4_size;
987                        info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
988                        if(info.v4_size < v4_size)
989// too few eligible blocks, no sense in trying bigger sizes
990                            v4enough = 1;
991                    }
992                }
993
994                info.mode = mode;
995// choose the best encoding per block, based on current experience
996                score = calculate_mode_score(s, h, &info, 0,
997                                             &v1shrunk, &v4shrunk
998#ifdef CINEPAK_REPORT_SERR
999, &serr
1000#endif
1001);
1002
1003                if(mode != MODE_V1_ONLY){
1004                    int extra_iterations_limit = s->max_extra_cb_iterations;
1005// recompute the codebooks, omitting the extra blocks
1006// we assume we _may_ come here with more blocks to encode than before
1007                    info.v1_size = v1_size;
1008                    new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1009                    if(new_v1_size < info.v1_size){
1010                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1011                        info.v1_size = new_v1_size;
1012                    }
1013// we assume we _may_ come here with more blocks to encode than before
1014                    info.v4_size = v4_size;
1015                    new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1016                    if(new_v4_size < info.v4_size) {
1017                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1018                        info.v4_size = new_v4_size;
1019                    }
1020// calculate the resulting score
1021// (do not move blocks to codebook encodings now, as some blocks may have
1022// got bigger errors despite a smaller training set - but we do not
1023// ever grow the training sets back)
1024                    for(;;) {
1025                        score = calculate_mode_score(s, h, &info, 1,
1026                                                     &v1shrunk, &v4shrunk
1027#ifdef CINEPAK_REPORT_SERR
1028, &serr
1029#endif
1030);
1031// do we have a reason to reiterate? if so, have we reached the limit?
1032                        if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1033// recompute the codebooks, omitting the extra blocks
1034                        if(v1shrunk) {
1035                            info.v1_size = v1_size;
1036                            new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1037                            if(new_v1_size < info.v1_size){
1038                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1039                                info.v1_size = new_v1_size;
1040                            }
1041                        }
1042                        if(v4shrunk) {
1043                            info.v4_size = v4_size;
1044                            new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1045                            if(new_v4_size < info.v4_size) {
1046                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1047                                info.v4_size = new_v4_size;
1048                            }
1049                        }
1050                    }
1051                }
1052
1053                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
1054
1055                if(best_size == 0 || score < *best_score) {
1056
1057                    *best_score = score;
1058#ifdef CINEPAK_REPORT_SERR
1059                    *best_serr = serr;
1060#endif
1061                    best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
1062
1063                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
1064                    //av_log(s->avctx, AV_LOG_INFO, "\n");
1065#ifdef CINEPAK_REPORT_SERR
1066                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
1067#endif
1068
1069#ifdef CINEPAKENC_DEBUG
1070                    //save MB encoding choices
1071                    memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1072#endif
1073
1074                    //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1075                    write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1076
1077                }
1078            }
1079        }
1080    }
1081
1082#ifdef CINEPAKENC_DEBUG
1083    //gather stats. this will only work properly of MAX_STRIPS == 1
1084    if(best_info.mode == MODE_V1_ONLY) {
1085        s->num_v1_mode++;
1086        s->num_v1_encs += s->w*h/MB_AREA;
1087    } else {
1088        if(best_info.mode == MODE_V1_V4)
1089            s->num_v4_mode++;
1090        else
1091            s->num_mc_mode++;
1092
1093        int x;
1094        for(x = 0; x < s->w*h/MB_AREA; x++)
1095            if(s->best_mb[x].best_encoding == ENC_V1)
1096                s->num_v1_encs++;
1097            else if(s->best_mb[x].best_encoding == ENC_V4)
1098                s->num_v4_encs++;
1099            else
1100                s->num_skips++;
1101    }
1102#endif
1103
1104    best_size += STRIP_HEADER_SIZE;
1105    memcpy(buf, s->strip_buf, best_size);
1106
1107    return best_size;
1108}
1109
1110static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1111{
1112    buf[0] = isakeyframe ? 0 : 1;
1113    AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1114    AV_WB16(&buf[4], s->w);
1115    AV_WB16(&buf[6], s->h);
1116    AV_WB16(&buf[8], num_strips);
1117
1118    return CVID_HEADER_SIZE;
1119}
1120
1121static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
1122{
1123    int num_strips, strip, i, y, nexty, size, temp_size;
1124    AVPicture last_pict, pict, scratch_pict;
1125    int64_t best_score = 0, score, score_temp;
1126#ifdef CINEPAK_REPORT_SERR
1127    int64_t best_serr = 0, serr, serr_temp;
1128#endif
1129
1130    int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1131
1132    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1133        int x;
1134// build a copy of the given frame in the correct colorspace
1135        for(y = 0; y < s->h; y += 2) {
1136            for(x = 0; x < s->w; x += 2) {
1137                uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1138                ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
1139                ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
1140                get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
1141                r = g = b = 0;
1142                for(i=0; i<4; ++i) {
1143                    int i1, i2;
1144                    i1 = (i&1); i2 = (i>=2);
1145                    rr = ir[i2][i1*3+0];
1146                    gg = ir[i2][i1*3+1];
1147                    bb = ir[i2][i1*3+2];
1148                    r += rr; g += gg; b += bb;
1149// using fixed point arithmetic for portable repeatability, scaling by 2^23
1150// "Y"
1151//                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1152                    rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1153                    if(      rr <   0) rr =   0;
1154                    else if (rr > 255) rr = 255;
1155                    scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
1156                }
1157// let us scale down as late as possible
1158//                r /= 4; g /= 4; b /= 4;
1159// "U"
1160//                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1161                rr = (-299683*r - 599156*g + 898839*b) >> 23;
1162                if(      rr < -128) rr = -128;
1163                else if (rr >  127) rr =  127;
1164                scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
1165// "V"
1166//                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1167                rr = (748893*r - 599156*g - 149737*b) >> 23;
1168                if(      rr < -128) rr = -128;
1169                else if (rr >  127) rr =  127;
1170                scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
1171            }
1172        }
1173    }
1174
1175    //would be nice but quite certainly incompatible with vintage players:
1176    // support encoding zero strips (meaning skip the whole frame)
1177    for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1178        score = 0;
1179        size = 0;
1180#ifdef CINEPAK_REPORT_SERR
1181        serr = 0;
1182#endif
1183
1184        for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1185            int strip_height;
1186
1187            nexty = strip * s->h / num_strips; // <= s->h
1188            //make nexty the next multiple of 4 if not already there
1189            if(nexty & 3)
1190                nexty += 4 - (nexty & 3);
1191
1192            strip_height = nexty - y;
1193            if(strip_height <= 0) { // can this ever happen?
1194                av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1195                continue;
1196            }
1197
1198            if(s->pix_fmt == AV_PIX_FMT_RGB24)
1199                get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
1200            else
1201                get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
1202            get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
1203            get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
1204
1205            if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1206#ifdef CINEPAK_REPORT_SERR
1207, &serr_temp
1208#endif
1209)) < 0)
1210                return temp_size;
1211
1212            score += score_temp;
1213#ifdef CINEPAK_REPORT_SERR
1214            serr += serr_temp;
1215#endif
1216            size += temp_size;
1217            //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1218            //av_log(s->avctx, AV_LOG_INFO, "\n");
1219        }
1220
1221        if(best_score == 0 || score < best_score) {
1222            best_score = score;
1223#ifdef CINEPAK_REPORT_SERR
1224            best_serr = serr;
1225#endif
1226            best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1227            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
1228#ifdef CINEPAK_REPORT_SERR
1229            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
1230#endif
1231
1232            FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1233            memcpy(buf, s->frame_buf, best_size);
1234            best_nstrips = num_strips;
1235        }
1236// avoid trying too many strip numbers without a real reason
1237// (this makes the processing of the very first frame faster)
1238        if(num_strips - best_nstrips > 4)
1239            break;
1240    }
1241
1242    av_assert0(best_nstrips >= 0 && best_size >= 0);
1243
1244// let the number of strips slowly adapt to the changes in the contents,
1245// compared to full bruteforcing every time this will occasionally lead
1246// to some r/d performance loss but makes encoding up to several times faster
1247    if(!s->strip_number_delta_range) {
1248        if(best_nstrips == s->max_strips) { // let us try to step up
1249            s->max_strips = best_nstrips + 1;
1250            if(s->max_strips >= s->max_max_strips)
1251                s->max_strips = s->max_max_strips;
1252        } else { // try to step down
1253            s->max_strips = best_nstrips;
1254        }
1255        s->min_strips = s->max_strips - 1;
1256        if(s->min_strips < s->min_min_strips)
1257            s->min_strips = s->min_min_strips;
1258    } else {
1259        s->max_strips = best_nstrips + s->strip_number_delta_range;
1260        if(s->max_strips >= s->max_max_strips)
1261            s->max_strips = s->max_max_strips;
1262        s->min_strips = best_nstrips - s->strip_number_delta_range;
1263        if(s->min_strips < s->min_min_strips)
1264            s->min_strips = s->min_min_strips;
1265    }
1266
1267    return best_size;
1268}
1269
1270static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1271                                const AVFrame *frame, int *got_packet)
1272{
1273    CinepakEncContext *s = avctx->priv_data;
1274    int ret;
1275
1276    s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1277
1278    if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
1279        return ret;
1280    ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1281    pkt->size = ret;
1282    if (s->curframe == 0)
1283        pkt->flags |= AV_PKT_FLAG_KEY;
1284    *got_packet = 1;
1285
1286    FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1287
1288    if (++s->curframe >= s->keyint)
1289        s->curframe = 0;
1290
1291    return 0;
1292}
1293
1294static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1295{
1296    CinepakEncContext *s = avctx->priv_data;
1297    int x;
1298
1299    av_frame_free(&s->last_frame);
1300    av_frame_free(&s->best_frame);
1301    av_frame_free(&s->scratch_frame);
1302    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1303        av_frame_free(&s->input_frame);
1304    av_freep(&s->codebook_input);
1305    av_freep(&s->codebook_closest);
1306    av_freep(&s->strip_buf);
1307    av_freep(&s->frame_buf);
1308    av_freep(&s->mb);
1309#ifdef CINEPAKENC_DEBUG
1310    av_freep(&s->best_mb);
1311#endif
1312
1313    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1314        av_freep(&s->pict_bufs[x]);
1315
1316#ifdef CINEPAKENC_DEBUG
1317    av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1318        s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1319#endif
1320
1321    return 0;
1322}
1323
1324AVCodec ff_cinepak_encoder = {
1325    .name           = "cinepak",
1326    .type           = AVMEDIA_TYPE_VIDEO,
1327    .id             = AV_CODEC_ID_CINEPAK,
1328    .priv_data_size = sizeof(CinepakEncContext),
1329    .init           = cinepak_encode_init,
1330    .encode2        = cinepak_encode_frame,
1331    .close          = cinepak_encode_end,
1332    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1333    .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1334    .priv_class     = &cinepak_class,
1335};
1336