1/*
2 * FFV1 codec for libavcodec
3 *
4 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
24 * @file
25 * FF Video Codec 1 (a lossless codec)
26 */
27
28#include "avcodec.h"
29#include "get_bits.h"
30#include "put_bits.h"
31#include "dsputil.h"
32#include "rangecoder.h"
33#include "golomb.h"
34#include "mathops.h"
35#include "libavutil/avassert.h"
36
37#define MAX_PLANES 4
38#define CONTEXT_SIZE 32
39
40#define MAX_QUANT_TABLES 8
41#define MAX_CONTEXT_INPUTS 5
42
43extern const uint8_t ff_log2_run[41];
44
45static const int8_t quant5_10bit[256]={
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
55-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
56-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
57-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
58-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,
59-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,
62};
63
64static const int8_t quant5[256]={
65 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
66 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
67 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
68 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
69 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
70 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
71 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
74-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
75-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
76-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
77-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
78-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
79-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
80-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
81};
82
83static const int8_t quant9_10bit[256]={
84 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
85 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
86 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
87 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
89 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
90 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
91 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
92-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
93-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
94-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
95-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
96-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,
97-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
98-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-1,-1,-1,-1,-1,-1,-1,-1,-0,-0,-0,-0,
100};
101
102static const int8_t quant11[256]={
103 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
104 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
105 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
106 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
107 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
108 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
109 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
110 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
111-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
112-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
113-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
114-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
115-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
116-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
117-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
119};
120
121static const uint8_t ver2_state[256]= {
122   0,  10,  10,  10,  10,  16,  16,  16,  28,  16,  16,  29,  42,  49,  20,  49,
123  59,  25,  26,  26,  27,  31,  33,  33,  33,  34,  34,  37,  67,  38,  39,  39,
124  40,  40,  41,  79,  43,  44,  45,  45,  48,  48,  64,  50,  51,  52,  88,  52,
125  53,  74,  55,  57,  58,  58,  74,  60, 101,  61,  62,  84,  66,  66,  68,  69,
126  87,  82,  71,  97,  73,  73,  82,  75, 111,  77,  94,  78,  87,  81,  83,  97,
127  85,  83,  94,  86,  99,  89,  90,  99, 111,  92,  93, 134,  95,  98, 105,  98,
128 105, 110, 102, 108, 102, 118, 103, 106, 106, 113, 109, 112, 114, 112, 116, 125,
129 115, 116, 117, 117, 126, 119, 125, 121, 121, 123, 145, 124, 126, 131, 127, 129,
130 165, 130, 132, 138, 133, 135, 145, 136, 137, 139, 146, 141, 143, 142, 144, 148,
131 147, 155, 151, 149, 151, 150, 152, 157, 153, 154, 156, 168, 158, 162, 161, 160,
132 172, 163, 169, 164, 166, 184, 167, 170, 177, 174, 171, 173, 182, 176, 180, 178,
133 175, 189, 179, 181, 186, 183, 192, 185, 200, 187, 191, 188, 190, 197, 193, 196,
134 197, 194, 195, 196, 198, 202, 199, 201, 210, 203, 207, 204, 205, 206, 208, 214,
135 209, 211, 221, 212, 213, 215, 224, 216, 217, 218, 219, 220, 222, 228, 223, 225,
136 226, 224, 227, 229, 240, 230, 231, 232, 233, 234, 235, 236, 238, 239, 237, 242,
137 241, 243, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255,
138};
139
140typedef struct VlcState{
141    int16_t drift;
142    uint16_t error_sum;
143    int8_t bias;
144    uint8_t count;
145} VlcState;
146
147typedef struct PlaneContext{
148    int16_t quant_table[MAX_CONTEXT_INPUTS][256];
149    int quant_table_index;
150    int context_count;
151    uint8_t (*state)[CONTEXT_SIZE];
152    VlcState *vlc_state;
153    uint8_t interlace_bit_state[2];
154} PlaneContext;
155
156#define MAX_SLICES 256
157
158typedef struct FFV1Context{
159    AVCodecContext *avctx;
160    RangeCoder c;
161    GetBitContext gb;
162    PutBitContext pb;
163    uint64_t rc_stat[256][2];
164    uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
165    int version;
166    int width, height;
167    int chroma_h_shift, chroma_v_shift;
168    int flags;
169    int picture_number;
170    AVFrame picture;
171    int plane_count;
172    int ac;                              ///< 1=range coder <-> 0=golomb rice
173    PlaneContext plane[MAX_PLANES];
174    int16_t quant_table[MAX_CONTEXT_INPUTS][256];
175    int16_t quant_tables[MAX_QUANT_TABLES][MAX_CONTEXT_INPUTS][256];
176    int context_count[MAX_QUANT_TABLES];
177    uint8_t state_transition[256];
178    uint8_t (*initial_states[MAX_QUANT_TABLES])[32];
179    int run_index;
180    int colorspace;
181    int16_t *sample_buffer;
182    int gob_count;
183
184    int quant_table_count;
185
186    DSPContext dsp;
187
188    struct FFV1Context *slice_context[MAX_SLICES];
189    int slice_count;
190    int num_v_slices;
191    int num_h_slices;
192    int slice_width;
193    int slice_height;
194    int slice_x;
195    int slice_y;
196}FFV1Context;
197
198static av_always_inline int fold(int diff, int bits){
199    if(bits==8)
200        diff= (int8_t)diff;
201    else{
202        diff+= 1<<(bits-1);
203        diff&=(1<<bits)-1;
204        diff-= 1<<(bits-1);
205    }
206
207    return diff;
208}
209
210static inline int predict(int16_t *src, int16_t *last)
211{
212    const int LT= last[-1];
213    const int  T= last[ 0];
214    const int L =  src[-1];
215
216    return mid_pred(L, L + T - LT, T);
217}
218
219static inline int get_context(PlaneContext *p, int16_t *src,
220                              int16_t *last, int16_t *last2)
221{
222    const int LT= last[-1];
223    const int  T= last[ 0];
224    const int RT= last[ 1];
225    const int L =  src[-1];
226
227    if(p->quant_table[3][127]){
228        const int TT= last2[0];
229        const int LL=  src[-2];
230        return p->quant_table[0][(L-LT) & 0xFF] + p->quant_table[1][(LT-T) & 0xFF] + p->quant_table[2][(T-RT) & 0xFF]
231              +p->quant_table[3][(LL-L) & 0xFF] + p->quant_table[4][(TT-T) & 0xFF];
232    }else
233        return p->quant_table[0][(L-LT) & 0xFF] + p->quant_table[1][(LT-T) & 0xFF] + p->quant_table[2][(T-RT) & 0xFF];
234}
235
236static void find_best_state(uint8_t best_state[256][256], const uint8_t one_state[256]){
237    int i,j,k,m;
238    double l2tab[256];
239
240    for(i=1; i<256; i++)
241        l2tab[i]= log2(i/256.0);
242
243    for(i=0; i<256; i++){
244        double best_len[256];
245        double p= i/256.0;
246
247        for(j=0; j<256; j++)
248            best_len[j]= 1<<30;
249
250        for(j=FFMAX(i-10,1); j<FFMIN(i+11,256); j++){
251            double occ[256]={0};
252            double len=0;
253            occ[j]=1.0;
254            for(k=0; k<256; k++){
255                double newocc[256]={0};
256                for(m=0; m<256; m++){
257                    if(occ[m]){
258                        len -=occ[m]*(     p *l2tab[    m]
259                                      + (1-p)*l2tab[256-m]);
260                    }
261                }
262                if(len < best_len[k]){
263                    best_len[k]= len;
264                    best_state[i][k]= j;
265                }
266                for(m=0; m<256; m++){
267                    if(occ[m]){
268                        newocc[    one_state[    m]] += occ[m]*   p ;
269                        newocc[256-one_state[256-m]] += occ[m]*(1-p);
270                    }
271                }
272                memcpy(occ, newocc, sizeof(occ));
273            }
274        }
275    }
276}
277
278static av_always_inline av_flatten void put_symbol_inline(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
279    int i;
280
281#define put_rac(C,S,B) \
282do{\
283    if(rc_stat){\
284    rc_stat[*(S)][B]++;\
285        rc_stat2[(S)-state][B]++;\
286    }\
287    put_rac(C,S,B);\
288}while(0)
289
290    if(v){
291        const int a= FFABS(v);
292        const int e= av_log2(a);
293        put_rac(c, state+0, 0);
294        if(e<=9){
295            for(i=0; i<e; i++){
296                put_rac(c, state+1+i, 1);  //1..10
297            }
298            put_rac(c, state+1+i, 0);
299
300            for(i=e-1; i>=0; i--){
301                put_rac(c, state+22+i, (a>>i)&1); //22..31
302            }
303
304            if(is_signed)
305                put_rac(c, state+11 + e, v < 0); //11..21
306        }else{
307            for(i=0; i<e; i++){
308                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
309            }
310            put_rac(c, state+1+9, 0);
311
312            for(i=e-1; i>=0; i--){
313                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
314            }
315
316            if(is_signed)
317                put_rac(c, state+11 + 10, v < 0); //11..21
318        }
319    }else{
320        put_rac(c, state+0, 1);
321    }
322#undef put_rac
323}
324
325static void av_noinline put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
326    put_symbol_inline(c, state, v, is_signed, NULL, NULL);
327}
328
329static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state, int is_signed){
330    if(get_rac(c, state+0))
331        return 0;
332    else{
333        int i, e, a;
334        e= 0;
335        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
336            e++;
337        }
338
339        a= 1;
340        for(i=e-1; i>=0; i--){
341            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
342        }
343
344        e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
345        return (a^e)-e;
346    }
347}
348
349static int av_noinline get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
350    return get_symbol_inline(c, state, is_signed);
351}
352
353static inline void update_vlc_state(VlcState * const state, const int v){
354    int drift= state->drift;
355    int count= state->count;
356    state->error_sum += FFABS(v);
357    drift += v;
358
359    if(count == 128){ //FIXME variable
360        count >>= 1;
361        drift >>= 1;
362        state->error_sum >>= 1;
363    }
364    count++;
365
366    if(drift <= -count){
367        if(state->bias > -128) state->bias--;
368
369        drift += count;
370        if(drift <= -count)
371            drift= -count + 1;
372    }else if(drift > 0){
373        if(state->bias <  127) state->bias++;
374
375        drift -= count;
376        if(drift > 0)
377            drift= 0;
378    }
379
380    state->drift= drift;
381    state->count= count;
382}
383
384static inline void put_vlc_symbol(PutBitContext *pb, VlcState * const state, int v, int bits){
385    int i, k, code;
386//printf("final: %d ", v);
387    v = fold(v - state->bias, bits);
388
389    i= state->count;
390    k=0;
391    while(i < state->error_sum){ //FIXME optimize
392        k++;
393        i += i;
394    }
395
396    assert(k<=8);
397
398#if 0 // JPEG LS
399    if(k==0 && 2*state->drift <= - state->count) code= v ^ (-1);
400    else                                         code= v;
401#else
402     code= v ^ ((2*state->drift + state->count)>>31);
403#endif
404
405//printf("v:%d/%d bias:%d error:%d drift:%d count:%d k:%d\n", v, code, state->bias, state->error_sum, state->drift, state->count, k);
406    set_sr_golomb(pb, code, k, 12, bits);
407
408    update_vlc_state(state, v);
409}
410
411static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int bits){
412    int k, i, v, ret;
413
414    i= state->count;
415    k=0;
416    while(i < state->error_sum){ //FIXME optimize
417        k++;
418        i += i;
419    }
420
421    assert(k<=8);
422
423    v= get_sr_golomb(gb, k, 12, bits);
424//printf("v:%d bias:%d error:%d drift:%d count:%d k:%d", v, state->bias, state->error_sum, state->drift, state->count, k);
425
426#if 0 // JPEG LS
427    if(k==0 && 2*state->drift <= - state->count) v ^= (-1);
428#else
429     v ^= ((2*state->drift + state->count)>>31);
430#endif
431
432    ret= fold(v + state->bias, bits);
433
434    update_vlc_state(state, v);
435//printf("final: %d\n", ret);
436    return ret;
437}
438
439#if CONFIG_FFV1_ENCODER
440static av_always_inline int encode_line(FFV1Context *s, int w,
441                                        int16_t *sample[2],
442                                        int plane_index, int bits)
443{
444    PlaneContext * const p= &s->plane[plane_index];
445    RangeCoder * const c= &s->c;
446    int x;
447    int run_index= s->run_index;
448    int run_count=0;
449    int run_mode=0;
450
451    if(s->ac){
452        if(c->bytestream_end - c->bytestream < w*20){
453            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
454            return -1;
455        }
456    }else{
457        if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < w*4){
458            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
459            return -1;
460        }
461    }
462
463    for(x=0; x<w; x++){
464        int diff, context;
465
466        context= get_context(p, sample[0]+x, sample[1]+x, sample[2]+x);
467        diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
468
469        if(context < 0){
470            context = -context;
471            diff= -diff;
472        }
473
474        diff= fold(diff, bits);
475
476        if(s->ac){
477            if(s->flags & CODEC_FLAG_PASS1){
478                put_symbol_inline(c, p->state[context], diff, 1, s->rc_stat, s->rc_stat2[p->quant_table_index][context]);
479            }else{
480                put_symbol_inline(c, p->state[context], diff, 1, NULL, NULL);
481            }
482        }else{
483            if(context == 0) run_mode=1;
484
485            if(run_mode){
486
487                if(diff){
488                    while(run_count >= 1<<ff_log2_run[run_index]){
489                        run_count -= 1<<ff_log2_run[run_index];
490                        run_index++;
491                        put_bits(&s->pb, 1, 1);
492                    }
493
494                    put_bits(&s->pb, 1 + ff_log2_run[run_index], run_count);
495                    if(run_index) run_index--;
496                    run_count=0;
497                    run_mode=0;
498                    if(diff>0) diff--;
499                }else{
500                    run_count++;
501                }
502            }
503
504//            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, (int)put_bits_count(&s->pb));
505
506            if(run_mode == 0)
507                put_vlc_symbol(&s->pb, &p->vlc_state[context], diff, bits);
508        }
509    }
510    if(run_mode){
511        while(run_count >= 1<<ff_log2_run[run_index]){
512            run_count -= 1<<ff_log2_run[run_index];
513            run_index++;
514            put_bits(&s->pb, 1, 1);
515        }
516
517        if(run_count)
518            put_bits(&s->pb, 1, 1);
519    }
520    s->run_index= run_index;
521
522    return 0;
523}
524
525static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
526    int x,y,i;
527    const int ring_size= s->avctx->context_model ? 3 : 2;
528    int16_t *sample[3];
529    s->run_index=0;
530
531    memset(s->sample_buffer, 0, ring_size*(w+6)*sizeof(*s->sample_buffer));
532
533    for(y=0; y<h; y++){
534        for(i=0; i<ring_size; i++)
535            sample[i]= s->sample_buffer + (w+6)*((h+i-y)%ring_size) + 3;
536
537        sample[0][-1]= sample[1][0  ];
538        sample[1][ w]= sample[1][w-1];
539//{START_TIMER
540        if(s->avctx->bits_per_raw_sample<=8){
541            for(x=0; x<w; x++){
542                sample[0][x]= src[x + stride*y];
543            }
544            encode_line(s, w, sample, plane_index, 8);
545        }else{
546            for(x=0; x<w; x++){
547                sample[0][x]= ((uint16_t*)(src + stride*y))[x] >> (16 - s->avctx->bits_per_raw_sample);
548            }
549            encode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
550        }
551//STOP_TIMER("encode line")}
552    }
553}
554
555static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
556    int x, y, p, i;
557    const int ring_size= s->avctx->context_model ? 3 : 2;
558    int16_t *sample[3][3];
559    s->run_index=0;
560
561    memset(s->sample_buffer, 0, ring_size*3*(w+6)*sizeof(*s->sample_buffer));
562
563    for(y=0; y<h; y++){
564        for(i=0; i<ring_size; i++)
565            for(p=0; p<3; p++)
566                sample[p][i]= s->sample_buffer + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3;
567
568        for(x=0; x<w; x++){
569            int v= src[x + stride*y];
570            int b= v&0xFF;
571            int g= (v>>8)&0xFF;
572            int r= (v>>16)&0xFF;
573
574            b -= g;
575            r -= g;
576            g += (b + r)>>2;
577            b += 0x100;
578            r += 0x100;
579
580//            assert(g>=0 && b>=0 && r>=0);
581//            assert(g<256 && b<512 && r<512);
582            sample[0][0][x]= g;
583            sample[1][0][x]= b;
584            sample[2][0][x]= r;
585        }
586        for(p=0; p<3; p++){
587            sample[p][0][-1]= sample[p][1][0  ];
588            sample[p][1][ w]= sample[p][1][w-1];
589            encode_line(s, w, sample[p], FFMIN(p, 1), 9);
590        }
591    }
592}
593
594static void write_quant_table(RangeCoder *c, int16_t *quant_table){
595    int last=0;
596    int i;
597    uint8_t state[CONTEXT_SIZE];
598    memset(state, 128, sizeof(state));
599
600    for(i=1; i<128 ; i++){
601        if(quant_table[i] != quant_table[i-1]){
602            put_symbol(c, state, i-last-1, 0);
603            last= i;
604        }
605    }
606    put_symbol(c, state, i-last-1, 0);
607}
608
609static void write_quant_tables(RangeCoder *c, int16_t quant_table[MAX_CONTEXT_INPUTS][256]){
610    int i;
611    for(i=0; i<5; i++)
612        write_quant_table(c, quant_table[i]);
613}
614
615static void write_header(FFV1Context *f){
616    uint8_t state[CONTEXT_SIZE];
617    int i, j;
618    RangeCoder * const c= &f->slice_context[0]->c;
619
620    memset(state, 128, sizeof(state));
621
622    if(f->version < 2){
623        put_symbol(c, state, f->version, 0);
624        put_symbol(c, state, f->ac, 0);
625        if(f->ac>1){
626            for(i=1; i<256; i++){
627                put_symbol(c, state, f->state_transition[i] - c->one_state[i], 1);
628            }
629        }
630        put_symbol(c, state, f->colorspace, 0); //YUV cs type
631        if(f->version>0)
632            put_symbol(c, state, f->avctx->bits_per_raw_sample, 0);
633        put_rac(c, state, 1); //chroma planes
634            put_symbol(c, state, f->chroma_h_shift, 0);
635            put_symbol(c, state, f->chroma_v_shift, 0);
636        put_rac(c, state, 0); //no transparency plane
637
638        write_quant_tables(c, f->quant_table);
639    }else{
640        put_symbol(c, state, f->slice_count, 0);
641        for(i=0; i<f->slice_count; i++){
642            FFV1Context *fs= f->slice_context[i];
643            put_symbol(c, state, (fs->slice_x     +1)*f->num_h_slices / f->width   , 0);
644            put_symbol(c, state, (fs->slice_y     +1)*f->num_v_slices / f->height  , 0);
645            put_symbol(c, state, (fs->slice_width +1)*f->num_h_slices / f->width -1, 0);
646            put_symbol(c, state, (fs->slice_height+1)*f->num_v_slices / f->height-1, 0);
647            for(j=0; j<f->plane_count; j++){
648                put_symbol(c, state, f->plane[j].quant_table_index, 0);
649                av_assert0(f->plane[j].quant_table_index == f->avctx->context_model);
650            }
651        }
652    }
653}
654#endif /* CONFIG_FFV1_ENCODER */
655
656static av_cold int common_init(AVCodecContext *avctx){
657    FFV1Context *s = avctx->priv_data;
658
659    s->avctx= avctx;
660    s->flags= avctx->flags;
661
662    dsputil_init(&s->dsp, avctx);
663
664    s->width = avctx->width;
665    s->height= avctx->height;
666
667    assert(s->width && s->height);
668    //defaults
669    s->num_h_slices=1;
670    s->num_v_slices=1;
671
672
673    return 0;
674}
675
676static int init_slice_state(FFV1Context *f){
677    int i, j;
678
679    for(i=0; i<f->slice_count; i++){
680        FFV1Context *fs= f->slice_context[i];
681        for(j=0; j<f->plane_count; j++){
682            PlaneContext * const p= &fs->plane[j];
683
684            if(fs->ac){
685                if(!p->    state) p->    state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t));
686                if(!p->    state)
687                    return AVERROR(ENOMEM);
688            }else{
689                if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
690                if(!p->vlc_state)
691                    return AVERROR(ENOMEM);
692            }
693        }
694
695        if (fs->ac>1){
696            //FIXME only redo if state_transition changed
697            for(j=1; j<256; j++){
698                fs->c.one_state [    j]= fs->state_transition[j];
699                fs->c.zero_state[256-j]= 256-fs->c.one_state [j];
700            }
701        }
702    }
703
704    return 0;
705}
706
707static av_cold int init_slice_contexts(FFV1Context *f){
708    int i;
709
710    f->slice_count= f->num_h_slices * f->num_v_slices;
711
712    for(i=0; i<f->slice_count; i++){
713        FFV1Context *fs= av_mallocz(sizeof(*fs));
714        int sx= i % f->num_h_slices;
715        int sy= i / f->num_h_slices;
716        int sxs= f->avctx->width * sx    / f->num_h_slices;
717        int sxe= f->avctx->width *(sx+1) / f->num_h_slices;
718        int sys= f->avctx->height* sy    / f->num_v_slices;
719        int sye= f->avctx->height*(sy+1) / f->num_v_slices;
720        f->slice_context[i]= fs;
721        memcpy(fs, f, sizeof(*fs));
722        memset(fs->rc_stat2, 0, sizeof(fs->rc_stat2));
723
724        fs->slice_width = sxe - sxs;
725        fs->slice_height= sye - sys;
726        fs->slice_x     = sxs;
727        fs->slice_y     = sys;
728
729        fs->sample_buffer = av_malloc(9 * (fs->width+6) * sizeof(*fs->sample_buffer));
730        if (!fs->sample_buffer)
731            return AVERROR(ENOMEM);
732    }
733    return 0;
734}
735
736static int allocate_initial_states(FFV1Context *f){
737    int i;
738
739    for(i=0; i<f->quant_table_count; i++){
740        f->initial_states[i]= av_malloc(f->context_count[i]*sizeof(*f->initial_states[i]));
741        if(!f->initial_states[i])
742            return AVERROR(ENOMEM);
743        memset(f->initial_states[i], 128, f->context_count[i]*sizeof(*f->initial_states[i]));
744    }
745    return 0;
746}
747
748#if CONFIG_FFV1_ENCODER
749static int write_extra_header(FFV1Context *f){
750    RangeCoder * const c= &f->c;
751    uint8_t state[CONTEXT_SIZE];
752    int i, j, k;
753    uint8_t state2[32][CONTEXT_SIZE];
754
755    memset(state2, 128, sizeof(state2));
756    memset(state, 128, sizeof(state));
757
758    f->avctx->extradata= av_malloc(f->avctx->extradata_size= 10000 + (11*11*5*5*5+11*11*11)*32);
759    ff_init_range_encoder(c, f->avctx->extradata, f->avctx->extradata_size);
760    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
761
762    put_symbol(c, state, f->version, 0);
763    put_symbol(c, state, f->ac, 0);
764    if(f->ac>1){
765        for(i=1; i<256; i++){
766            put_symbol(c, state, f->state_transition[i] - c->one_state[i], 1);
767        }
768    }
769    put_symbol(c, state, f->colorspace, 0); //YUV cs type
770    put_symbol(c, state, f->avctx->bits_per_raw_sample, 0);
771    put_rac(c, state, 1); //chroma planes
772        put_symbol(c, state, f->chroma_h_shift, 0);
773        put_symbol(c, state, f->chroma_v_shift, 0);
774    put_rac(c, state, 0); //no transparency plane
775    put_symbol(c, state, f->num_h_slices-1, 0);
776    put_symbol(c, state, f->num_v_slices-1, 0);
777
778    put_symbol(c, state, f->quant_table_count, 0);
779    for(i=0; i<f->quant_table_count; i++)
780        write_quant_tables(c, f->quant_tables[i]);
781
782    for(i=0; i<f->quant_table_count; i++){
783        for(j=0; j<f->context_count[i]*CONTEXT_SIZE; j++)
784            if(f->initial_states[i] && f->initial_states[i][0][j] != 128)
785                break;
786        if(j<f->context_count[i]*CONTEXT_SIZE){
787            put_rac(c, state, 1);
788            for(j=0; j<f->context_count[i]; j++){
789                for(k=0; k<CONTEXT_SIZE; k++){
790                    int pred= j ? f->initial_states[i][j-1][k] : 128;
791                    put_symbol(c, state2[k], (int8_t)(f->initial_states[i][j][k]-pred), 1);
792                }
793            }
794        }else{
795            put_rac(c, state, 0);
796        }
797    }
798
799    f->avctx->extradata_size= ff_rac_terminate(c);
800
801    return 0;
802}
803
804static int sort_stt(FFV1Context *s, uint8_t stt[256]){
805    int i,i2,changed,print=0;
806
807    do{
808        changed=0;
809        for(i=12; i<244; i++){
810            for(i2=i+1; i2<245 && i2<i+4; i2++){
811#define COST(old, new) \
812    s->rc_stat[old][0]*-log2((256-(new))/256.0)\
813   +s->rc_stat[old][1]*-log2(     (new) /256.0)
814
815#define COST2(old, new) \
816    COST(old, new)\
817   +COST(256-(old), 256-(new))
818
819                double size0= COST2(i, i ) + COST2(i2, i2);
820                double sizeX= COST2(i, i2) + COST2(i2, i );
821                if(sizeX < size0 && i!=128 && i2!=128){
822                    int j;
823                    FFSWAP(int, stt[    i], stt[    i2]);
824                    FFSWAP(int, s->rc_stat[i    ][0],s->rc_stat[    i2][0]);
825                    FFSWAP(int, s->rc_stat[i    ][1],s->rc_stat[    i2][1]);
826                    if(i != 256-i2){
827                        FFSWAP(int, stt[256-i], stt[256-i2]);
828                        FFSWAP(int, s->rc_stat[256-i][0],s->rc_stat[256-i2][0]);
829                        FFSWAP(int, s->rc_stat[256-i][1],s->rc_stat[256-i2][1]);
830                    }
831                    for(j=1; j<256; j++){
832                        if     (stt[j] == i ) stt[j] = i2;
833                        else if(stt[j] == i2) stt[j] = i ;
834                        if(i != 256-i2){
835                            if     (stt[256-j] == 256-i ) stt[256-j] = 256-i2;
836                            else if(stt[256-j] == 256-i2) stt[256-j] = 256-i ;
837                        }
838                    }
839                    print=changed=1;
840                }
841            }
842        }
843    }while(changed);
844    return print;
845}
846
847static av_cold int encode_init(AVCodecContext *avctx)
848{
849    FFV1Context *s = avctx->priv_data;
850    int i, j, k, m;
851
852    common_init(avctx);
853
854    s->version=0;
855    s->ac= avctx->coder_type ? 2:0;
856
857    if(s->ac>1)
858        for(i=1; i<256; i++)
859            s->state_transition[i]=ver2_state[i];
860
861    s->plane_count=2;
862    for(i=0; i<256; i++){
863        s->quant_table_count=2;
864        if(avctx->bits_per_raw_sample <=8){
865            s->quant_tables[0][0][i]=           quant11[i];
866            s->quant_tables[0][1][i]=        11*quant11[i];
867            s->quant_tables[0][2][i]=     11*11*quant11[i];
868            s->quant_tables[1][0][i]=           quant11[i];
869            s->quant_tables[1][1][i]=        11*quant11[i];
870            s->quant_tables[1][2][i]=     11*11*quant5 [i];
871            s->quant_tables[1][3][i]=   5*11*11*quant5 [i];
872            s->quant_tables[1][4][i]= 5*5*11*11*quant5 [i];
873        }else{
874            s->quant_tables[0][0][i]=           quant9_10bit[i];
875            s->quant_tables[0][1][i]=        11*quant9_10bit[i];
876            s->quant_tables[0][2][i]=     11*11*quant9_10bit[i];
877            s->quant_tables[1][0][i]=           quant9_10bit[i];
878            s->quant_tables[1][1][i]=        11*quant9_10bit[i];
879            s->quant_tables[1][2][i]=     11*11*quant5_10bit[i];
880            s->quant_tables[1][3][i]=   5*11*11*quant5_10bit[i];
881            s->quant_tables[1][4][i]= 5*5*11*11*quant5_10bit[i];
882        }
883    }
884    s->context_count[0]= (11*11*11+1)/2;
885    s->context_count[1]= (11*11*5*5*5+1)/2;
886    memcpy(s->quant_table, s->quant_tables[avctx->context_model], sizeof(s->quant_table));
887
888    for(i=0; i<s->plane_count; i++){
889        PlaneContext * const p= &s->plane[i];
890
891        memcpy(p->quant_table, s->quant_table, sizeof(p->quant_table));
892        p->quant_table_index= avctx->context_model;
893        p->context_count= s->context_count[p->quant_table_index];
894    }
895
896    if(allocate_initial_states(s) < 0)
897        return AVERROR(ENOMEM);
898
899    avctx->coded_frame= &s->picture;
900    switch(avctx->pix_fmt){
901    case PIX_FMT_YUV444P16:
902    case PIX_FMT_YUV422P16:
903    case PIX_FMT_YUV420P16:
904        if(avctx->bits_per_raw_sample <=8){
905            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample invalid\n");
906            return -1;
907        }
908        if(!s->ac){
909            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
910            return -1;
911        }
912        s->version= FFMAX(s->version, 1);
913    case PIX_FMT_YUV444P:
914    case PIX_FMT_YUV422P:
915    case PIX_FMT_YUV420P:
916    case PIX_FMT_YUV411P:
917    case PIX_FMT_YUV410P:
918        s->colorspace= 0;
919        break;
920    case PIX_FMT_RGB32:
921        s->colorspace= 1;
922        break;
923    default:
924        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
925        return -1;
926    }
927    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
928
929    s->picture_number=0;
930
931    if(avctx->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){
932        for(i=0; i<s->quant_table_count; i++){
933            s->rc_stat2[i]= av_mallocz(s->context_count[i]*sizeof(*s->rc_stat2[i]));
934            if(!s->rc_stat2[i])
935                return AVERROR(ENOMEM);
936        }
937    }
938    if(avctx->stats_in){
939        char *p= avctx->stats_in;
940        uint8_t best_state[256][256];
941        int gob_count=0;
942        char *next;
943
944        av_assert0(s->version>=2);
945
946        for(;;){
947            for(j=0; j<256; j++){
948                for(i=0; i<2; i++){
949                    s->rc_stat[j][i]= strtol(p, &next, 0);
950                    if(next==p){
951                        av_log(avctx, AV_LOG_ERROR, "2Pass file invalid at %d %d [%s]\n", j,i,p);
952                        return -1;
953                    }
954                    p=next;
955                }
956            }
957            for(i=0; i<s->quant_table_count; i++){
958                for(j=0; j<s->context_count[i]; j++){
959                    for(k=0; k<32; k++){
960                        for(m=0; m<2; m++){
961                            s->rc_stat2[i][j][k][m]= strtol(p, &next, 0);
962                            if(next==p){
963                                av_log(avctx, AV_LOG_ERROR, "2Pass file invalid at %d %d %d %d [%s]\n", i,j,k,m,p);
964                                return -1;
965                            }
966                            p=next;
967                        }
968                    }
969                }
970            }
971            gob_count= strtol(p, &next, 0);
972            if(next==p || gob_count <0){
973                av_log(avctx, AV_LOG_ERROR, "2Pass file invalid\n");
974                return -1;
975            }
976            p=next;
977            while(*p=='\n' || *p==' ') p++;
978            if(p[0]==0) break;
979        }
980        sort_stt(s, s->state_transition);
981
982        find_best_state(best_state, s->state_transition);
983
984        for(i=0; i<s->quant_table_count; i++){
985            for(j=0; j<s->context_count[i]; j++){
986                for(k=0; k<32; k++){
987                    double p= 128;
988                    if(s->rc_stat2[i][j][k][0]+s->rc_stat2[i][j][k][1]){
989                        p=256.0*s->rc_stat2[i][j][k][1] / (s->rc_stat2[i][j][k][0]+s->rc_stat2[i][j][k][1]);
990                    }
991                    s->initial_states[i][j][k]= best_state[av_clip(round(p), 1, 255)][av_clip((s->rc_stat2[i][j][k][0]+s->rc_stat2[i][j][k][1])/gob_count, 0, 255)];
992                }
993            }
994        }
995    }
996
997    if(s->version>1){
998        s->num_h_slices=2;
999        s->num_v_slices=2;
1000        write_extra_header(s);
1001    }
1002
1003    if(init_slice_contexts(s) < 0)
1004        return -1;
1005    if(init_slice_state(s) < 0)
1006        return -1;
1007
1008#define STATS_OUT_SIZE 1024*1024*6
1009    if(avctx->flags & CODEC_FLAG_PASS1){
1010        avctx->stats_out= av_mallocz(STATS_OUT_SIZE);
1011        for(i=0; i<s->quant_table_count; i++){
1012            for(j=0; j<s->slice_count; j++){
1013                FFV1Context *sf= s->slice_context[j];
1014                av_assert0(!sf->rc_stat2[i]);
1015                sf->rc_stat2[i]= av_mallocz(s->context_count[i]*sizeof(*sf->rc_stat2[i]));
1016                if(!sf->rc_stat2[i])
1017                    return AVERROR(ENOMEM);
1018            }
1019        }
1020    }
1021
1022    return 0;
1023}
1024#endif /* CONFIG_FFV1_ENCODER */
1025
1026
1027static void clear_state(FFV1Context *f){
1028    int i, si, j;
1029
1030    for(si=0; si<f->slice_count; si++){
1031        FFV1Context *fs= f->slice_context[si];
1032        for(i=0; i<f->plane_count; i++){
1033            PlaneContext *p= &fs->plane[i];
1034
1035            p->interlace_bit_state[0]= 128;
1036            p->interlace_bit_state[1]= 128;
1037
1038            if(fs->ac){
1039                if(f->initial_states[p->quant_table_index]){
1040                    memcpy(p->state, f->initial_states[p->quant_table_index], CONTEXT_SIZE*p->context_count);
1041                }else
1042                memset(p->state, 128, CONTEXT_SIZE*p->context_count);
1043            }else{
1044            for(j=0; j<p->context_count; j++){
1045                    p->vlc_state[j].drift= 0;
1046                    p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2);
1047                    p->vlc_state[j].bias= 0;
1048                    p->vlc_state[j].count= 1;
1049            }
1050            }
1051        }
1052    }
1053}
1054
1055#if CONFIG_FFV1_ENCODER
1056static int encode_slice(AVCodecContext *c, void *arg){
1057    FFV1Context *fs= *(void**)arg;
1058    FFV1Context *f= fs->avctx->priv_data;
1059    int width = fs->slice_width;
1060    int height= fs->slice_height;
1061    int x= fs->slice_x;
1062    int y= fs->slice_y;
1063    AVFrame * const p= &f->picture;
1064
1065    if(f->colorspace==0){
1066        const int chroma_width = -((-width )>>f->chroma_h_shift);
1067        const int chroma_height= -((-height)>>f->chroma_v_shift);
1068        const int cx= x>>f->chroma_h_shift;
1069        const int cy= y>>f->chroma_v_shift;
1070
1071        encode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0);
1072
1073        encode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
1074        encode_plane(fs, p->data[2] + cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
1075    }else{
1076        encode_rgb_frame(fs, (uint32_t*)(p->data[0]) + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
1077    }
1078    emms_c();
1079
1080    return 0;
1081}
1082
1083static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
1084    FFV1Context *f = avctx->priv_data;
1085    RangeCoder * const c= &f->slice_context[0]->c;
1086    AVFrame *pict = data;
1087    AVFrame * const p= &f->picture;
1088    int used_count= 0;
1089    uint8_t keystate=128;
1090    uint8_t *buf_p;
1091    int i;
1092
1093    ff_init_range_encoder(c, buf, buf_size);
1094    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1095
1096    *p = *pict;
1097    p->pict_type= AV_PICTURE_TYPE_I;
1098
1099    if(avctx->gop_size==0 || f->picture_number % avctx->gop_size == 0){
1100        put_rac(c, &keystate, 1);
1101        p->key_frame= 1;
1102        f->gob_count++;
1103        write_header(f);
1104        clear_state(f);
1105    }else{
1106        put_rac(c, &keystate, 0);
1107        p->key_frame= 0;
1108    }
1109
1110    if(!f->ac){
1111        used_count += ff_rac_terminate(c);
1112//printf("pos=%d\n", used_count);
1113        init_put_bits(&f->slice_context[0]->pb, buf + used_count, buf_size - used_count);
1114    }else if (f->ac>1){
1115        int i;
1116        for(i=1; i<256; i++){
1117            c->one_state[i]= f->state_transition[i];
1118            c->zero_state[256-i]= 256-c->one_state[i];
1119        }
1120    }
1121
1122    for(i=1; i<f->slice_count; i++){
1123        FFV1Context *fs= f->slice_context[i];
1124        uint8_t *start= buf + (buf_size-used_count)*i/f->slice_count;
1125        int len= buf_size/f->slice_count;
1126
1127        if(fs->ac){
1128            ff_init_range_encoder(&fs->c, start, len);
1129        }else{
1130            init_put_bits(&fs->pb, start, len);
1131        }
1132    }
1133    avctx->execute(avctx, encode_slice, &f->slice_context[0], NULL, f->slice_count, sizeof(void*));
1134
1135    buf_p=buf;
1136    for(i=0; i<f->slice_count; i++){
1137        FFV1Context *fs= f->slice_context[i];
1138        int bytes;
1139
1140        if(fs->ac){
1141            uint8_t state=128;
1142            put_rac(&fs->c, &state, 0);
1143            bytes= ff_rac_terminate(&fs->c);
1144        }else{
1145            flush_put_bits(&fs->pb); //nicer padding FIXME
1146            bytes= used_count + (put_bits_count(&fs->pb)+7)/8;
1147            used_count= 0;
1148        }
1149        if(i>0){
1150            av_assert0(bytes < buf_size/f->slice_count);
1151            memmove(buf_p, fs->ac ? fs->c.bytestream_start : fs->pb.buf, bytes);
1152            av_assert0(bytes < (1<<24));
1153            AV_WB24(buf_p+bytes, bytes);
1154            bytes+=3;
1155        }
1156        buf_p += bytes;
1157    }
1158
1159    if((avctx->flags&CODEC_FLAG_PASS1) && (f->picture_number&31)==0){
1160        int j, k, m;
1161        char *p= avctx->stats_out;
1162        char *end= p + STATS_OUT_SIZE;
1163
1164        memset(f->rc_stat, 0, sizeof(f->rc_stat));
1165        for(i=0; i<f->quant_table_count; i++)
1166            memset(f->rc_stat2[i], 0, f->context_count[i]*sizeof(*f->rc_stat2[i]));
1167
1168        for(j=0; j<f->slice_count; j++){
1169            FFV1Context *fs= f->slice_context[j];
1170            for(i=0; i<256; i++){
1171                f->rc_stat[i][0] += fs->rc_stat[i][0];
1172                f->rc_stat[i][1] += fs->rc_stat[i][1];
1173            }
1174            for(i=0; i<f->quant_table_count; i++){
1175                for(k=0; k<f->context_count[i]; k++){
1176                    for(m=0; m<32; m++){
1177                        f->rc_stat2[i][k][m][0] += fs->rc_stat2[i][k][m][0];
1178                        f->rc_stat2[i][k][m][1] += fs->rc_stat2[i][k][m][1];
1179                    }
1180                }
1181            }
1182        }
1183
1184        for(j=0; j<256; j++){
1185            snprintf(p, end-p, "%"PRIu64" %"PRIu64" ", f->rc_stat[j][0], f->rc_stat[j][1]);
1186            p+= strlen(p);
1187        }
1188        snprintf(p, end-p, "\n");
1189
1190        for(i=0; i<f->quant_table_count; i++){
1191            for(j=0; j<f->context_count[i]; j++){
1192                for(m=0; m<32; m++){
1193                    snprintf(p, end-p, "%"PRIu64" %"PRIu64" ", f->rc_stat2[i][j][m][0], f->rc_stat2[i][j][m][1]);
1194                    p+= strlen(p);
1195                }
1196            }
1197        }
1198        snprintf(p, end-p, "%d\n", f->gob_count);
1199    } else if(avctx->flags&CODEC_FLAG_PASS1)
1200        avctx->stats_out[0] = '\0';
1201
1202    f->picture_number++;
1203    return buf_p-buf;
1204}
1205#endif /* CONFIG_FFV1_ENCODER */
1206
1207static av_cold int common_end(AVCodecContext *avctx){
1208    FFV1Context *s = avctx->priv_data;
1209    int i, j;
1210
1211    if (avctx->codec->decode && s->picture.data[0])
1212        avctx->release_buffer(avctx, &s->picture);
1213
1214    for(j=0; j<s->slice_count; j++){
1215        FFV1Context *fs= s->slice_context[j];
1216        for(i=0; i<s->plane_count; i++){
1217            PlaneContext *p= &fs->plane[i];
1218
1219            av_freep(&p->state);
1220            av_freep(&p->vlc_state);
1221        }
1222        av_freep(&fs->sample_buffer);
1223    }
1224
1225    av_freep(&avctx->stats_out);
1226    for(j=0; j<s->quant_table_count; j++){
1227        av_freep(&s->initial_states[j]);
1228        for(i=0; i<s->slice_count; i++){
1229            FFV1Context *sf= s->slice_context[i];
1230            av_freep(&sf->rc_stat2[j]);
1231        }
1232        av_freep(&s->rc_stat2[j]);
1233    }
1234
1235    for(i=0; i<s->slice_count; i++){
1236        av_freep(&s->slice_context[i]);
1237    }
1238
1239    return 0;
1240}
1241
1242static av_always_inline void decode_line(FFV1Context *s, int w,
1243                                         int16_t *sample[2],
1244                                         int plane_index, int bits)
1245{
1246    PlaneContext * const p= &s->plane[plane_index];
1247    RangeCoder * const c= &s->c;
1248    int x;
1249    int run_count=0;
1250    int run_mode=0;
1251    int run_index= s->run_index;
1252
1253    for(x=0; x<w; x++){
1254        int diff, context, sign;
1255
1256        context= get_context(p, sample[1] + x, sample[0] + x, sample[1] + x);
1257        if(context < 0){
1258            context= -context;
1259            sign=1;
1260        }else
1261            sign=0;
1262
1263        av_assert2(context < p->context_count);
1264
1265        if(s->ac){
1266            diff= get_symbol_inline(c, p->state[context], 1);
1267        }else{
1268            if(context == 0 && run_mode==0) run_mode=1;
1269
1270            if(run_mode){
1271                if(run_count==0 && run_mode==1){
1272                    if(get_bits1(&s->gb)){
1273                        run_count = 1<<ff_log2_run[run_index];
1274                        if(x + run_count <= w) run_index++;
1275                    }else{
1276                        if(ff_log2_run[run_index]) run_count = get_bits(&s->gb, ff_log2_run[run_index]);
1277                        else run_count=0;
1278                        if(run_index) run_index--;
1279                        run_mode=2;
1280                    }
1281                }
1282                run_count--;
1283                if(run_count < 0){
1284                    run_mode=0;
1285                    run_count=0;
1286                    diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
1287                    if(diff>=0) diff++;
1288                }else
1289                    diff=0;
1290            }else
1291                diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
1292
1293//            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, get_bits_count(&s->gb));
1294        }
1295
1296        if(sign) diff= -diff;
1297
1298        sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<<bits)-1);
1299    }
1300    s->run_index= run_index;
1301}
1302
1303static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
1304    int x, y;
1305    int16_t *sample[2];
1306    sample[0]=s->sample_buffer    +3;
1307    sample[1]=s->sample_buffer+w+6+3;
1308
1309    s->run_index=0;
1310
1311    memset(s->sample_buffer, 0, 2*(w+6)*sizeof(*s->sample_buffer));
1312
1313    for(y=0; y<h; y++){
1314        int16_t *temp = sample[0]; //FIXME try a normal buffer
1315
1316        sample[0]= sample[1];
1317        sample[1]= temp;
1318
1319        sample[1][-1]= sample[0][0  ];
1320        sample[0][ w]= sample[0][w-1];
1321
1322//{START_TIMER
1323        if(s->avctx->bits_per_raw_sample <= 8){
1324            decode_line(s, w, sample, plane_index, 8);
1325            for(x=0; x<w; x++){
1326                src[x + stride*y]= sample[1][x];
1327            }
1328        }else{
1329            decode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
1330            for(x=0; x<w; x++){
1331                ((uint16_t*)(src + stride*y))[x]= sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
1332            }
1333        }
1334//STOP_TIMER("decode-line")}
1335    }
1336}
1337
1338static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){
1339    int x, y, p;
1340    int16_t *sample[3][2];
1341    for(x=0; x<3; x++){
1342        sample[x][0] = s->sample_buffer +  x*2   *(w+6) + 3;
1343        sample[x][1] = s->sample_buffer + (x*2+1)*(w+6) + 3;
1344    }
1345
1346    s->run_index=0;
1347
1348    memset(s->sample_buffer, 0, 6*(w+6)*sizeof(*s->sample_buffer));
1349
1350    for(y=0; y<h; y++){
1351        for(p=0; p<3; p++){
1352            int16_t *temp = sample[p][0]; //FIXME try a normal buffer
1353
1354            sample[p][0]= sample[p][1];
1355            sample[p][1]= temp;
1356
1357            sample[p][1][-1]= sample[p][0][0  ];
1358            sample[p][0][ w]= sample[p][0][w-1];
1359            decode_line(s, w, sample[p], FFMIN(p, 1), 9);
1360        }
1361        for(x=0; x<w; x++){
1362            int g= sample[0][1][x];
1363            int b= sample[1][1][x];
1364            int r= sample[2][1][x];
1365
1366//            assert(g>=0 && b>=0 && r>=0);
1367//            assert(g<256 && b<512 && r<512);
1368
1369            b -= 0x100;
1370            r -= 0x100;
1371            g -= (b + r)>>2;
1372            b += g;
1373            r += g;
1374
1375            src[x + stride*y]= b + (g<<8) + (r<<16) + (0xFF<<24);
1376        }
1377    }
1378}
1379
1380static int decode_slice(AVCodecContext *c, void *arg){
1381    FFV1Context *fs= *(void**)arg;
1382    FFV1Context *f= fs->avctx->priv_data;
1383    int width = fs->slice_width;
1384    int height= fs->slice_height;
1385    int x= fs->slice_x;
1386    int y= fs->slice_y;
1387    AVFrame * const p= &f->picture;
1388
1389    av_assert1(width && height);
1390    if(f->colorspace==0){
1391        const int chroma_width = -((-width )>>f->chroma_h_shift);
1392        const int chroma_height= -((-height)>>f->chroma_v_shift);
1393        const int cx= x>>f->chroma_h_shift;
1394        const int cy= y>>f->chroma_v_shift;
1395        decode_plane(fs, p->data[0] + x + y*p->linesize[0], width, height, p->linesize[0], 0);
1396
1397        decode_plane(fs, p->data[1] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
1398        decode_plane(fs, p->data[2] + cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[2], 1);
1399    }else{
1400        decode_rgb_frame(fs, (uint32_t*)p->data[0] + x + y*(p->linesize[0]/4), width, height, p->linesize[0]/4);
1401    }
1402
1403    emms_c();
1404
1405    return 0;
1406}
1407
1408static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){
1409    int v;
1410    int i=0;
1411    uint8_t state[CONTEXT_SIZE];
1412
1413    memset(state, 128, sizeof(state));
1414
1415    for(v=0; i<128 ; v++){
1416        int len= get_symbol(c, state, 0) + 1;
1417
1418        if(len + i > 128) return -1;
1419
1420        while(len--){
1421            quant_table[i] = scale*v;
1422            i++;
1423//printf("%2d ",v);
1424//if(i%16==0) printf("\n");
1425        }
1426    }
1427
1428    for(i=1; i<128; i++){
1429        quant_table[256-i]= -quant_table[i];
1430    }
1431    quant_table[128]= -quant_table[127];
1432
1433    return 2*v - 1;
1434}
1435
1436static int read_quant_tables(RangeCoder *c, int16_t quant_table[MAX_CONTEXT_INPUTS][256]){
1437    int i;
1438    int context_count=1;
1439
1440    for(i=0; i<5; i++){
1441        context_count*= read_quant_table(c, quant_table[i], context_count);
1442        if(context_count > 32768U){
1443            return -1;
1444        }
1445    }
1446    return (context_count+1)/2;
1447}
1448
1449static int read_extra_header(FFV1Context *f){
1450    RangeCoder * const c= &f->c;
1451    uint8_t state[CONTEXT_SIZE];
1452    int i, j, k;
1453    uint8_t state2[32][CONTEXT_SIZE];
1454
1455    memset(state2, 128, sizeof(state2));
1456    memset(state, 128, sizeof(state));
1457
1458    ff_init_range_decoder(c, f->avctx->extradata, f->avctx->extradata_size);
1459    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1460
1461    f->version= get_symbol(c, state, 0);
1462    f->ac= f->avctx->coder_type= get_symbol(c, state, 0);
1463    if(f->ac>1){
1464        for(i=1; i<256; i++){
1465            f->state_transition[i]= get_symbol(c, state, 1) + c->one_state[i];
1466        }
1467    }
1468    f->colorspace= get_symbol(c, state, 0); //YUV cs type
1469    f->avctx->bits_per_raw_sample= get_symbol(c, state, 0);
1470    get_rac(c, state); //no chroma = false
1471    f->chroma_h_shift= get_symbol(c, state, 0);
1472    f->chroma_v_shift= get_symbol(c, state, 0);
1473    get_rac(c, state); //transparency plane
1474    f->plane_count= 2;
1475    f->num_h_slices= 1 + get_symbol(c, state, 0);
1476    f->num_v_slices= 1 + get_symbol(c, state, 0);
1477    if(f->num_h_slices > (unsigned)f->width || f->num_v_slices > (unsigned)f->height){
1478        av_log(f->avctx, AV_LOG_ERROR, "too many slices\n");
1479        return -1;
1480    }
1481
1482    f->quant_table_count= get_symbol(c, state, 0);
1483    if(f->quant_table_count > (unsigned)MAX_QUANT_TABLES)
1484        return -1;
1485    for(i=0; i<f->quant_table_count; i++){
1486        if((f->context_count[i]= read_quant_tables(c, f->quant_tables[i])) < 0){
1487            av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
1488            return -1;
1489        }
1490    }
1491
1492    if(allocate_initial_states(f) < 0)
1493        return AVERROR(ENOMEM);
1494
1495    for(i=0; i<f->quant_table_count; i++){
1496        if(get_rac(c, state)){
1497            for(j=0; j<f->context_count[i]; j++){
1498                for(k=0; k<CONTEXT_SIZE; k++){
1499                    int pred= j ? f->initial_states[i][j-1][k] : 128;
1500                    f->initial_states[i][j][k]= (pred+get_symbol(c, state2[k], 1))&0xFF;
1501                }
1502            }
1503        }
1504    }
1505
1506    return 0;
1507}
1508
1509static int read_header(FFV1Context *f){
1510    uint8_t state[CONTEXT_SIZE];
1511    int i, j, context_count;
1512    RangeCoder * const c= &f->slice_context[0]->c;
1513
1514    memset(state, 128, sizeof(state));
1515
1516    if(f->version < 2){
1517        f->version= get_symbol(c, state, 0);
1518        f->ac= f->avctx->coder_type= get_symbol(c, state, 0);
1519        if(f->ac>1){
1520            for(i=1; i<256; i++){
1521                f->state_transition[i]= get_symbol(c, state, 1) + c->one_state[i];
1522            }
1523        }
1524        f->colorspace= get_symbol(c, state, 0); //YUV cs type
1525        if(f->version>0)
1526            f->avctx->bits_per_raw_sample= get_symbol(c, state, 0);
1527        get_rac(c, state); //no chroma = false
1528        f->chroma_h_shift= get_symbol(c, state, 0);
1529        f->chroma_v_shift= get_symbol(c, state, 0);
1530        get_rac(c, state); //transparency plane
1531        f->plane_count= 2;
1532    }
1533
1534    if(f->colorspace==0){
1535        if(f->avctx->bits_per_raw_sample<=8){
1536            switch(16*f->chroma_h_shift + f->chroma_v_shift){
1537            case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P; break;
1538            case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break;
1539            case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break;
1540            case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break;
1541            case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break;
1542            default:
1543                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
1544                return -1;
1545            }
1546        }else{
1547            switch(16*f->chroma_h_shift + f->chroma_v_shift){
1548            case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P16; break;
1549            case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P16; break;
1550            case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P16; break;
1551            default:
1552                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
1553                return -1;
1554            }
1555        }
1556    }else if(f->colorspace==1){
1557        if(f->chroma_h_shift || f->chroma_v_shift){
1558            av_log(f->avctx, AV_LOG_ERROR, "chroma subsampling not supported in this colorspace\n");
1559            return -1;
1560        }
1561        f->avctx->pix_fmt= PIX_FMT_RGB32;
1562    }else{
1563        av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");
1564        return -1;
1565    }
1566
1567//printf("%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift,f->avctx->pix_fmt);
1568    if(f->version < 2){
1569        context_count= read_quant_tables(c, f->quant_table);
1570        if(context_count < 0){
1571                av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
1572                return -1;
1573        }
1574    }else{
1575        f->slice_count= get_symbol(c, state, 0);
1576        if(f->slice_count > (unsigned)MAX_SLICES)
1577            return -1;
1578    }
1579
1580    for(j=0; j<f->slice_count; j++){
1581        FFV1Context *fs= f->slice_context[j];
1582        fs->ac= f->ac;
1583
1584        if(f->version >= 2){
1585            fs->slice_x     = get_symbol(c, state, 0)   *f->width ;
1586            fs->slice_y     = get_symbol(c, state, 0)   *f->height;
1587            fs->slice_width =(get_symbol(c, state, 0)+1)*f->width  + fs->slice_x;
1588            fs->slice_height=(get_symbol(c, state, 0)+1)*f->height + fs->slice_y;
1589
1590            fs->slice_x /= f->num_h_slices;
1591            fs->slice_y /= f->num_v_slices;
1592            fs->slice_width  = fs->slice_width /f->num_h_slices - fs->slice_x;
1593            fs->slice_height = fs->slice_height/f->num_v_slices - fs->slice_y;
1594            if((unsigned)fs->slice_width > f->width || (unsigned)fs->slice_height > f->height)
1595                return -1;
1596            if(    (unsigned)fs->slice_x + (uint64_t)fs->slice_width  > f->width
1597                || (unsigned)fs->slice_y + (uint64_t)fs->slice_height > f->height)
1598                return -1;
1599        }
1600
1601        for(i=0; i<f->plane_count; i++){
1602            PlaneContext * const p= &fs->plane[i];
1603
1604            if(f->version >= 2){
1605                int idx=get_symbol(c, state, 0);
1606                if(idx > (unsigned)f->quant_table_count){
1607                    av_log(f->avctx, AV_LOG_ERROR, "quant_table_index out of range\n");
1608                    return -1;
1609                }
1610                p->quant_table_index= idx;
1611                memcpy(p->quant_table, f->quant_tables[idx], sizeof(p->quant_table));
1612                context_count= f->context_count[idx];
1613            }else{
1614                memcpy(p->quant_table, f->quant_table, sizeof(p->quant_table));
1615            }
1616
1617            if(p->context_count < context_count){
1618                av_freep(&p->state);
1619                av_freep(&p->vlc_state);
1620            }
1621            p->context_count= context_count;
1622        }
1623    }
1624
1625    return 0;
1626}
1627
1628static av_cold int decode_init(AVCodecContext *avctx)
1629{
1630    FFV1Context *f = avctx->priv_data;
1631
1632    common_init(avctx);
1633
1634    if(avctx->extradata && read_extra_header(f) < 0)
1635        return -1;
1636
1637    if(init_slice_contexts(f) < 0)
1638        return -1;
1639
1640    return 0;
1641}
1642
1643static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
1644    const uint8_t *buf = avpkt->data;
1645    int buf_size = avpkt->size;
1646    FFV1Context *f = avctx->priv_data;
1647    RangeCoder * const c= &f->slice_context[0]->c;
1648    AVFrame * const p= &f->picture;
1649    int bytes_read, i;
1650    uint8_t keystate= 128;
1651    const uint8_t *buf_p;
1652
1653    AVFrame *picture = data;
1654
1655    /* release previously stored data */
1656    if (p->data[0])
1657        avctx->release_buffer(avctx, p);
1658
1659    ff_init_range_decoder(c, buf, buf_size);
1660    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1661
1662
1663    p->pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
1664    if(get_rac(c, &keystate)){
1665        p->key_frame= 1;
1666        if(read_header(f) < 0)
1667            return -1;
1668        if(init_slice_state(f) < 0)
1669            return -1;
1670
1671        clear_state(f);
1672    }else{
1673        p->key_frame= 0;
1674    }
1675    if(f->ac>1){
1676        int i;
1677        for(i=1; i<256; i++){
1678            c->one_state[i]= f->state_transition[i];
1679            c->zero_state[256-i]= 256-c->one_state[i];
1680        }
1681    }
1682
1683    p->reference= 0;
1684    if(avctx->get_buffer(avctx, p) < 0){
1685        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1686        return -1;
1687    }
1688
1689    if(avctx->debug&FF_DEBUG_PICT_INFO)
1690        av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac);
1691
1692    if(!f->ac){
1693        bytes_read = c->bytestream - c->bytestream_start - 1;
1694        if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME
1695//printf("pos=%d\n", bytes_read);
1696        init_get_bits(&f->slice_context[0]->gb, buf + bytes_read, (buf_size - bytes_read) * 8);
1697    } else {
1698        bytes_read = 0; /* avoid warning */
1699    }
1700
1701    buf_p= buf + buf_size;
1702    for(i=f->slice_count-1; i>0; i--){
1703        FFV1Context *fs= f->slice_context[i];
1704        int v= AV_RB24(buf_p-3)+3;
1705        if(buf_p - buf <= v){
1706            av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n");
1707            return -1;
1708        }
1709        buf_p -= v;
1710        if(fs->ac){
1711            ff_init_range_decoder(&fs->c, buf_p, v);
1712        }else{
1713            init_get_bits(&fs->gb, buf_p, v * 8);
1714        }
1715    }
1716
1717    avctx->execute(avctx, decode_slice, &f->slice_context[0], NULL, f->slice_count, sizeof(void*));
1718    f->picture_number++;
1719
1720    *picture= *p;
1721    *data_size = sizeof(AVFrame);
1722
1723    return buf_size;
1724}
1725
1726AVCodec ff_ffv1_decoder = {
1727    .name           = "ffv1",
1728    .type           = AVMEDIA_TYPE_VIDEO,
1729    .id             = CODEC_ID_FFV1,
1730    .priv_data_size = sizeof(FFV1Context),
1731    .init           = decode_init,
1732    .close          = common_end,
1733    .decode         = decode_frame,
1734    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ | CODEC_CAP_SLICE_THREADS,
1735    .long_name= NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
1736};
1737
1738#if CONFIG_FFV1_ENCODER
1739AVCodec ff_ffv1_encoder = {
1740    .name           = "ffv1",
1741    .type           = AVMEDIA_TYPE_VIDEO,
1742    .id             = CODEC_ID_FFV1,
1743    .priv_data_size = sizeof(FFV1Context),
1744    .init           = encode_init,
1745    .encode         = encode_frame,
1746    .close          = common_end,
1747    .capabilities = CODEC_CAP_SLICE_THREADS,
1748    .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV444P, PIX_FMT_YUV422P, PIX_FMT_YUV411P, PIX_FMT_YUV410P, PIX_FMT_RGB32, PIX_FMT_YUV420P16, PIX_FMT_YUV422P16, PIX_FMT_YUV444P16, PIX_FMT_NONE},
1749    .long_name= NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
1750};
1751#endif
1752