1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * H.264 / AVC / MPEG4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28#define CABAC 0
29
30#include "internal.h"
31#include "avcodec.h"
32#include "mpegvideo.h"
33#include "h264.h"
34#include "h264data.h" // FIXME FIXME FIXME
35#include "h264_mvpred.h"
36#include "golomb.h"
37
38//#undef NDEBUG
39#include <assert.h>
40
41static const uint8_t golomb_to_inter_cbp_gray[16]={
42 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43};
44
45static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
4615, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47};
48
49static const uint8_t chroma_dc_coeff_token_len[4*5]={
50 2, 0, 0, 0,
51 6, 1, 0, 0,
52 6, 6, 3, 0,
53 6, 7, 7, 6,
54 6, 8, 8, 7,
55};
56
57static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58 1, 0, 0, 0,
59 7, 1, 0, 0,
60 4, 6, 1, 0,
61 3, 3, 2, 5,
62 2, 3, 2, 0,
63};
64
65static const uint8_t coeff_token_len[4][4*17]={
66{
67     1, 0, 0, 0,
68     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
69    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
70    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
71    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
72},
73{
74     2, 0, 0, 0,
75     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
76     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
77    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
78    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
79},
80{
81     4, 0, 0, 0,
82     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
83     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
84     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
85    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
86},
87{
88     6, 0, 0, 0,
89     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
90     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
91     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
92     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
93}
94};
95
96static const uint8_t coeff_token_bits[4][4*17]={
97{
98     1, 0, 0, 0,
99     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
100     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
101    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
102    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
103},
104{
105     3, 0, 0, 0,
106    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
107     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
108    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
109    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
110},
111{
112    15, 0, 0, 0,
113    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
114    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
115    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
116    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
117},
118{
119     3, 0, 0, 0,
120     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
121    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
122    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
123    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
124}
125};
126
127static const uint8_t total_zeros_len[16][16]= {
128    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
129    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
130    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
131    {5,3,4,4,3,3,3,4,3,4,5,5,5},
132    {4,4,4,3,3,3,3,3,4,5,4,5},
133    {6,5,3,3,3,3,3,3,4,3,6},
134    {6,5,3,3,3,2,3,4,3,6},
135    {6,4,5,3,2,2,3,3,6},
136    {6,6,4,2,2,3,2,5},
137    {5,5,3,2,2,2,4},
138    {4,4,3,3,1,3},
139    {4,4,2,1,3},
140    {3,3,1,2},
141    {2,2,1},
142    {1,1},
143};
144
145static const uint8_t total_zeros_bits[16][16]= {
146    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
147    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
148    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
149    {3,7,5,4,6,5,4,3,3,2,2,1,0},
150    {5,4,3,7,6,5,4,3,2,1,1,0},
151    {1,1,7,6,5,4,3,2,1,1,0},
152    {1,1,5,4,3,3,2,1,1,0},
153    {1,1,1,3,3,2,2,1,0},
154    {1,0,1,3,2,1,1,1},
155    {1,0,1,3,2,1,1},
156    {0,1,1,2,1,3},
157    {0,1,1,1,1},
158    {0,1,1,1},
159    {0,1,1},
160    {0,1},
161};
162
163static const uint8_t chroma_dc_total_zeros_len[3][4]= {
164    { 1, 2, 3, 3,},
165    { 1, 2, 2, 0,},
166    { 1, 1, 0, 0,},
167};
168
169static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
170    { 1, 1, 1, 0,},
171    { 1, 1, 0, 0,},
172    { 1, 0, 0, 0,},
173};
174
175static const uint8_t run_len[7][16]={
176    {1,1},
177    {1,2,2},
178    {2,2,2,2},
179    {2,2,2,3,3},
180    {2,2,3,3,3,3},
181    {2,3,3,3,3,3,3},
182    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
183};
184
185static const uint8_t run_bits[7][16]={
186    {1,0},
187    {1,1,0},
188    {3,2,1,0},
189    {3,2,1,1,0},
190    {3,2,3,2,1,0},
191    {3,0,1,3,2,5,4},
192    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
193};
194
195static VLC coeff_token_vlc[4];
196static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
197static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
198
199static VLC chroma_dc_coeff_token_vlc;
200static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
201static const int chroma_dc_coeff_token_vlc_table_size = 256;
202
203static VLC total_zeros_vlc[15];
204static VLC_TYPE total_zeros_vlc_tables[15][512][2];
205static const int total_zeros_vlc_tables_size = 512;
206
207static VLC chroma_dc_total_zeros_vlc[3];
208static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
209static const int chroma_dc_total_zeros_vlc_tables_size = 8;
210
211static VLC run_vlc[6];
212static VLC_TYPE run_vlc_tables[6][8][2];
213static const int run_vlc_tables_size = 8;
214
215static VLC run7_vlc;
216static VLC_TYPE run7_vlc_table[96][2];
217static const int run7_vlc_table_size = 96;
218
219#define LEVEL_TAB_BITS 8
220static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
221
222
223/**
224 * gets the predicted number of non-zero coefficients.
225 * @param n block index
226 */
227static inline int pred_non_zero_count(H264Context *h, int n){
228    const int index8= scan8[n];
229    const int left= h->non_zero_count_cache[index8 - 1];
230    const int top = h->non_zero_count_cache[index8 - 8];
231    int i= left + top;
232
233    if(i<64) i= (i+1)>>1;
234
235    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
236
237    return i&31;
238}
239
240static av_cold void init_cavlc_level_tab(void){
241    int suffix_length, mask;
242    unsigned int i;
243
244    for(suffix_length=0; suffix_length<7; suffix_length++){
245        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
246            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
247            int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
248
249            mask= -(level_code&1);
250            level_code= (((2+level_code)>>1) ^ mask) - mask;
251            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
252                cavlc_level_tab[suffix_length][i][0]= level_code;
253                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
254            }else if(prefix + 1 <= LEVEL_TAB_BITS){
255                cavlc_level_tab[suffix_length][i][0]= prefix+100;
256                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
257            }else{
258                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
259                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
260            }
261        }
262    }
263}
264
265av_cold void ff_h264_decode_init_vlc(void){
266    static int done = 0;
267
268    if (!done) {
269        int i;
270        int offset;
271        done = 1;
272
273        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
274        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
275        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
276                 &chroma_dc_coeff_token_len [0], 1, 1,
277                 &chroma_dc_coeff_token_bits[0], 1, 1,
278                 INIT_VLC_USE_NEW_STATIC);
279
280        offset = 0;
281        for(i=0; i<4; i++){
282            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
283            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
284            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
285                     &coeff_token_len [i][0], 1, 1,
286                     &coeff_token_bits[i][0], 1, 1,
287                     INIT_VLC_USE_NEW_STATIC);
288            offset += coeff_token_vlc_tables_size[i];
289        }
290        /*
291         * This is a one time safety check to make sure that
292         * the packed static coeff_token_vlc table sizes
293         * were initialized correctly.
294         */
295        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
296
297        for(i=0; i<3; i++){
298            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
299            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
300            init_vlc(&chroma_dc_total_zeros_vlc[i],
301                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
302                     &chroma_dc_total_zeros_len [i][0], 1, 1,
303                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
304                     INIT_VLC_USE_NEW_STATIC);
305        }
306        for(i=0; i<15; i++){
307            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
308            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
309            init_vlc(&total_zeros_vlc[i],
310                     TOTAL_ZEROS_VLC_BITS, 16,
311                     &total_zeros_len [i][0], 1, 1,
312                     &total_zeros_bits[i][0], 1, 1,
313                     INIT_VLC_USE_NEW_STATIC);
314        }
315
316        for(i=0; i<6; i++){
317            run_vlc[i].table = run_vlc_tables[i];
318            run_vlc[i].table_allocated = run_vlc_tables_size;
319            init_vlc(&run_vlc[i],
320                     RUN_VLC_BITS, 7,
321                     &run_len [i][0], 1, 1,
322                     &run_bits[i][0], 1, 1,
323                     INIT_VLC_USE_NEW_STATIC);
324        }
325        run7_vlc.table = run7_vlc_table,
326        run7_vlc.table_allocated = run7_vlc_table_size;
327        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
328                 &run_len [6][0], 1, 1,
329                 &run_bits[6][0], 1, 1,
330                 INIT_VLC_USE_NEW_STATIC);
331
332        init_cavlc_level_tab();
333    }
334}
335
336/**
337 *
338 */
339static inline int get_level_prefix(GetBitContext *gb){
340    unsigned int buf;
341    int log;
342
343    OPEN_READER(re, gb);
344    UPDATE_CACHE(re, gb);
345    buf=GET_CACHE(re, gb);
346
347    log= 32 - av_log2(buf);
348#ifdef TRACE
349    print_bin(buf>>(32-log), log);
350    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
351#endif
352
353    LAST_SKIP_BITS(re, gb, log);
354    CLOSE_READER(re, gb);
355
356    return log-1;
357}
358
359/**
360 * decodes a residual block.
361 * @param n block index
362 * @param scantable scantable
363 * @param max_coeff number of coefficients in the block
364 * @return <0 if an error occurred
365 */
366static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
367    MpegEncContext * const s = &h->s;
368    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
369    int level[16];
370    int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
371
372    //FIXME put trailing_onex into the context
373
374    if(n == CHROMA_DC_BLOCK_INDEX){
375        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
376        total_coeff= coeff_token>>2;
377    }else{
378        if(n == LUMA_DC_BLOCK_INDEX){
379            total_coeff= pred_non_zero_count(h, 0);
380            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
381            total_coeff= coeff_token>>2;
382        }else{
383            total_coeff= pred_non_zero_count(h, n);
384            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
385            total_coeff= coeff_token>>2;
386            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
387        }
388    }
389
390    //FIXME set last_non_zero?
391
392    if(total_coeff==0)
393        return 0;
394    if(total_coeff > (unsigned)max_coeff) {
395        av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
396        return -1;
397    }
398
399    trailing_ones= coeff_token&3;
400    tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
401    assert(total_coeff<=16);
402
403    i = show_bits(gb, 3);
404    skip_bits(gb, trailing_ones);
405    level[0] = 1-((i&4)>>1);
406    level[1] = 1-((i&2)   );
407    level[2] = 1-((i&1)<<1);
408
409    if(trailing_ones<total_coeff) {
410        int mask, prefix;
411        int suffix_length = total_coeff > 10 & trailing_ones < 3;
412        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
413        int level_code= cavlc_level_tab[suffix_length][bitsi][0];
414
415        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
416        if(level_code >= 100){
417            prefix= level_code - 100;
418            if(prefix == LEVEL_TAB_BITS)
419                prefix += get_level_prefix(gb);
420
421            //first coefficient has suffix_length equal to 0 or 1
422            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
423                if(suffix_length)
424                    level_code= (prefix<<1) + get_bits1(gb); //part
425                else
426                    level_code= prefix; //part
427            }else if(prefix==14){
428                if(suffix_length)
429                    level_code= (prefix<<1) + get_bits1(gb); //part
430                else
431                    level_code= prefix + get_bits(gb, 4); //part
432            }else{
433                level_code= 30 + get_bits(gb, prefix-3); //part
434                if(prefix>=16){
435                    if(prefix > 25+3){
436                        av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
437                        return -1;
438                    }
439                    level_code += (1<<(prefix-3))-4096;
440                }
441            }
442
443            if(trailing_ones < 3) level_code += 2;
444
445            suffix_length = 2;
446            mask= -(level_code&1);
447            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
448        }else{
449            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
450
451            suffix_length = 1 + (level_code + 3U > 6U);
452            level[trailing_ones]= level_code;
453        }
454
455        //remaining coefficients have suffix_length > 0
456        for(i=trailing_ones+1;i<total_coeff;i++) {
457            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
458            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
459            level_code= cavlc_level_tab[suffix_length][bitsi][0];
460
461            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
462            if(level_code >= 100){
463                prefix= level_code - 100;
464                if(prefix == LEVEL_TAB_BITS){
465                    prefix += get_level_prefix(gb);
466                }
467                if(prefix<15){
468                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
469                }else{
470                    level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
471                    if(prefix>=16)
472                        level_code += (1<<(prefix-3))-4096;
473                }
474                mask= -(level_code&1);
475                level_code= (((2+level_code)>>1) ^ mask) - mask;
476            }
477            level[i]= level_code;
478            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
479        }
480    }
481
482    if(total_coeff == max_coeff)
483        zeros_left=0;
484    else{
485        if(n == CHROMA_DC_BLOCK_INDEX)
486            zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
487        else
488            zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
489    }
490
491    coeff_num = zeros_left + total_coeff - 1;
492    j = scantable[coeff_num];
493    if(n > 24){
494        block[j] = level[0];
495        for(i=1;i<total_coeff;i++) {
496            if(zeros_left <= 0)
497                run_before = 0;
498            else if(zeros_left < 7){
499                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
500            }else{
501                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
502            }
503            zeros_left -= run_before;
504            coeff_num -= 1 + run_before;
505            j= scantable[ coeff_num ];
506
507            block[j]= level[i];
508        }
509    }else{
510        block[j] = (level[0] * qmul[j] + 32)>>6;
511        for(i=1;i<total_coeff;i++) {
512            if(zeros_left <= 0)
513                run_before = 0;
514            else if(zeros_left < 7){
515                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
516            }else{
517                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
518            }
519            zeros_left -= run_before;
520            coeff_num -= 1 + run_before;
521            j= scantable[ coeff_num ];
522
523            block[j]= (level[i] * qmul[j] + 32)>>6;
524        }
525    }
526
527    if(zeros_left<0){
528        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
529        return -1;
530    }
531
532    return 0;
533}
534
535int ff_h264_decode_mb_cavlc(H264Context *h){
536    MpegEncContext * const s = &h->s;
537    int mb_xy;
538    int partition_count;
539    unsigned int mb_type, cbp;
540    int dct8x8_allowed= h->pps.transform_8x8_mode;
541
542    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
543
544    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
545    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
546                down the code */
547    if(h->slice_type_nos != FF_I_TYPE){
548        if(s->mb_skip_run==-1)
549            s->mb_skip_run= get_ue_golomb(&s->gb);
550
551        if (s->mb_skip_run--) {
552            if(FRAME_MBAFF && (s->mb_y&1) == 0){
553                if(s->mb_skip_run==0)
554                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
555            }
556            decode_mb_skip(h);
557            return 0;
558        }
559    }
560    if(FRAME_MBAFF){
561        if( (s->mb_y&1) == 0 )
562            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
563    }
564
565    h->prev_mb_skipped= 0;
566
567    mb_type= get_ue_golomb(&s->gb);
568    if(h->slice_type_nos == FF_B_TYPE){
569        if(mb_type < 23){
570            partition_count= b_mb_type_info[mb_type].partition_count;
571            mb_type=         b_mb_type_info[mb_type].type;
572        }else{
573            mb_type -= 23;
574            goto decode_intra_mb;
575        }
576    }else if(h->slice_type_nos == FF_P_TYPE){
577        if(mb_type < 5){
578            partition_count= p_mb_type_info[mb_type].partition_count;
579            mb_type=         p_mb_type_info[mb_type].type;
580        }else{
581            mb_type -= 5;
582            goto decode_intra_mb;
583        }
584    }else{
585       assert(h->slice_type_nos == FF_I_TYPE);
586        if(h->slice_type == FF_SI_TYPE && mb_type)
587            mb_type--;
588decode_intra_mb:
589        if(mb_type > 25){
590            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
591            return -1;
592        }
593        partition_count=0;
594        cbp= i_mb_type_info[mb_type].cbp;
595        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
596        mb_type= i_mb_type_info[mb_type].type;
597    }
598
599    if(MB_FIELD)
600        mb_type |= MB_TYPE_INTERLACED;
601
602    h->slice_table[ mb_xy ]= h->slice_num;
603
604    if(IS_INTRA_PCM(mb_type)){
605        unsigned int x;
606
607        // We assume these blocks are very rare so we do not optimize it.
608        align_get_bits(&s->gb);
609
610        // The pixels are stored in the same order as levels in h->mb array.
611        for(x=0; x < (CHROMA ? 384 : 256); x++){
612            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
613        }
614
615        // In deblocking, the quantizer is 0
616        s->current_picture.qscale_table[mb_xy]= 0;
617        // All coeffs are present
618        memset(h->non_zero_count[mb_xy], 16, 32);
619
620        s->current_picture.mb_type[mb_xy]= mb_type;
621        return 0;
622    }
623
624    if(MB_MBAFF){
625        h->ref_count[0] <<= 1;
626        h->ref_count[1] <<= 1;
627    }
628
629    fill_decode_neighbors(h, mb_type);
630    fill_decode_caches(h, mb_type);
631
632    //mb_pred
633    if(IS_INTRA(mb_type)){
634        int pred_mode;
635//            init_top_left_availability(h);
636        if(IS_INTRA4x4(mb_type)){
637            int i;
638            int di = 1;
639            if(dct8x8_allowed && get_bits1(&s->gb)){
640                mb_type |= MB_TYPE_8x8DCT;
641                di = 4;
642            }
643
644//                fill_intra4x4_pred_table(h);
645            for(i=0; i<16; i+=di){
646                int mode= pred_intra_mode(h, i);
647
648                if(!get_bits1(&s->gb)){
649                    const int rem_mode= get_bits(&s->gb, 3);
650                    mode = rem_mode + (rem_mode >= mode);
651                }
652
653                if(di==4)
654                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
655                else
656                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
657            }
658            ff_h264_write_back_intra_pred_mode(h);
659            if( ff_h264_check_intra4x4_pred_mode(h) < 0)
660                return -1;
661        }else{
662            h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode(h, h->intra16x16_pred_mode);
663            if(h->intra16x16_pred_mode < 0)
664                return -1;
665        }
666        if(CHROMA){
667            pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
668            if(pred_mode < 0)
669                return -1;
670            h->chroma_pred_mode= pred_mode;
671        }
672    }else if(partition_count==4){
673        int i, j, sub_partition_count[4], list, ref[2][4];
674
675        if(h->slice_type_nos == FF_B_TYPE){
676            for(i=0; i<4; i++){
677                h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
678                if(h->sub_mb_type[i] >=13){
679                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
680                    return -1;
681                }
682                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
683                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
684            }
685            if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
686                ff_h264_pred_direct_motion(h, &mb_type);
687                h->ref_cache[0][scan8[4]] =
688                h->ref_cache[1][scan8[4]] =
689                h->ref_cache[0][scan8[12]] =
690                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
691            }
692        }else{
693            assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
694            for(i=0; i<4; i++){
695                h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
696                if(h->sub_mb_type[i] >=4){
697                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
698                    return -1;
699                }
700                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
701                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
702            }
703        }
704
705        for(list=0; list<h->list_count; list++){
706            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
707            for(i=0; i<4; i++){
708                if(IS_DIRECT(h->sub_mb_type[i])) continue;
709                if(IS_DIR(h->sub_mb_type[i], 0, list)){
710                    unsigned int tmp;
711                    if(ref_count == 1){
712                        tmp= 0;
713                    }else if(ref_count == 2){
714                        tmp= get_bits1(&s->gb)^1;
715                    }else{
716                        tmp= get_ue_golomb_31(&s->gb);
717                        if(tmp>=ref_count){
718                            av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
719                            return -1;
720                        }
721                    }
722                    ref[list][i]= tmp;
723                }else{
724                 //FIXME
725                    ref[list][i] = -1;
726                }
727            }
728        }
729
730        if(dct8x8_allowed)
731            dct8x8_allowed = get_dct8x8_allowed(h);
732
733        for(list=0; list<h->list_count; list++){
734            for(i=0; i<4; i++){
735                if(IS_DIRECT(h->sub_mb_type[i])) {
736                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
737                    continue;
738                }
739                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
740                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
741
742                if(IS_DIR(h->sub_mb_type[i], 0, list)){
743                    const int sub_mb_type= h->sub_mb_type[i];
744                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
745                    for(j=0; j<sub_partition_count[i]; j++){
746                        int mx, my;
747                        const int index= 4*i + block_width*j;
748                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
749                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
750                        mx += get_se_golomb(&s->gb);
751                        my += get_se_golomb(&s->gb);
752                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
753
754                        if(IS_SUB_8X8(sub_mb_type)){
755                            mv_cache[ 1 ][0]=
756                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
757                            mv_cache[ 1 ][1]=
758                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
759                        }else if(IS_SUB_8X4(sub_mb_type)){
760                            mv_cache[ 1 ][0]= mx;
761                            mv_cache[ 1 ][1]= my;
762                        }else if(IS_SUB_4X8(sub_mb_type)){
763                            mv_cache[ 8 ][0]= mx;
764                            mv_cache[ 8 ][1]= my;
765                        }
766                        mv_cache[ 0 ][0]= mx;
767                        mv_cache[ 0 ][1]= my;
768                    }
769                }else{
770                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
771                    p[0] = p[1]=
772                    p[8] = p[9]= 0;
773                }
774            }
775        }
776    }else if(IS_DIRECT(mb_type)){
777        ff_h264_pred_direct_motion(h, &mb_type);
778        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
779    }else{
780        int list, mx, my, i;
781         //FIXME we should set ref_idx_l? to 0 if we use that later ...
782        if(IS_16X16(mb_type)){
783            for(list=0; list<h->list_count; list++){
784                    unsigned int val;
785                    if(IS_DIR(mb_type, 0, list)){
786                        if(h->ref_count[list]==1){
787                            val= 0;
788                        }else if(h->ref_count[list]==2){
789                            val= get_bits1(&s->gb)^1;
790                        }else{
791                            val= get_ue_golomb_31(&s->gb);
792                            if(val >= h->ref_count[list]){
793                                av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
794                                return -1;
795                            }
796                        }
797                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
798                    }
799            }
800            for(list=0; list<h->list_count; list++){
801                if(IS_DIR(mb_type, 0, list)){
802                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
803                    mx += get_se_golomb(&s->gb);
804                    my += get_se_golomb(&s->gb);
805                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);
806
807                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
808                }
809            }
810        }
811        else if(IS_16X8(mb_type)){
812            for(list=0; list<h->list_count; list++){
813                    for(i=0; i<2; i++){
814                        unsigned int val;
815                        if(IS_DIR(mb_type, i, list)){
816                            if(h->ref_count[list] == 1){
817                                val= 0;
818                            }else if(h->ref_count[list] == 2){
819                                val= get_bits1(&s->gb)^1;
820                            }else{
821                                val= get_ue_golomb_31(&s->gb);
822                                if(val >= h->ref_count[list]){
823                                    av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
824                                    return -1;
825                                }
826                            }
827                        }else
828                            val= LIST_NOT_USED&0xFF;
829                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
830                    }
831            }
832            for(list=0; list<h->list_count; list++){
833                for(i=0; i<2; i++){
834                    unsigned int val;
835                    if(IS_DIR(mb_type, i, list)){
836                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
837                        mx += get_se_golomb(&s->gb);
838                        my += get_se_golomb(&s->gb);
839                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
840
841                        val= pack16to32(mx,my);
842                    }else
843                        val=0;
844                    fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
845                }
846            }
847        }else{
848            assert(IS_8X16(mb_type));
849            for(list=0; list<h->list_count; list++){
850                    for(i=0; i<2; i++){
851                        unsigned int val;
852                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
853                            if(h->ref_count[list]==1){
854                                val= 0;
855                            }else if(h->ref_count[list]==2){
856                                val= get_bits1(&s->gb)^1;
857                            }else{
858                                val= get_ue_golomb_31(&s->gb);
859                                if(val >= h->ref_count[list]){
860                                    av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
861                                    return -1;
862                                }
863                            }
864                        }else
865                            val= LIST_NOT_USED&0xFF;
866                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
867                    }
868            }
869            for(list=0; list<h->list_count; list++){
870                for(i=0; i<2; i++){
871                    unsigned int val;
872                    if(IS_DIR(mb_type, i, list)){
873                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
874                        mx += get_se_golomb(&s->gb);
875                        my += get_se_golomb(&s->gb);
876                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
877
878                        val= pack16to32(mx,my);
879                    }else
880                        val=0;
881                    fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
882                }
883            }
884        }
885    }
886
887    if(IS_INTER(mb_type))
888        write_back_motion(h, mb_type);
889
890    if(!IS_INTRA16x16(mb_type)){
891        cbp= get_ue_golomb(&s->gb);
892        if(cbp > 47){
893            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
894            return -1;
895        }
896
897        if(CHROMA){
898            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
899            else                     cbp= golomb_to_inter_cbp   [cbp];
900        }else{
901            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
902            else                     cbp= golomb_to_inter_cbp_gray[cbp];
903        }
904    }
905
906    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
907        mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
908    }
909    h->cbp=
910    h->cbp_table[mb_xy]= cbp;
911    s->current_picture.mb_type[mb_xy]= mb_type;
912
913    if(cbp || IS_INTRA16x16(mb_type)){
914        int i8x8, i4x4, chroma_idx;
915        int dquant;
916        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
917        const uint8_t *scan, *scan8x8, *dc_scan;
918
919        if(IS_INTERLACED(mb_type)){
920            scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
921            scan= s->qscale ? h->field_scan : h->field_scan_q0;
922            dc_scan= luma_dc_field_scan;
923        }else{
924            scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
925            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
926            dc_scan= luma_dc_zigzag_scan;
927        }
928
929        dquant= get_se_golomb(&s->gb);
930
931        s->qscale += dquant;
932
933        if(((unsigned)s->qscale) > 51){
934            if(s->qscale<0) s->qscale+= 52;
935            else            s->qscale-= 52;
936            if(((unsigned)s->qscale) > 51){
937                av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
938                return -1;
939            }
940        }
941
942        h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
943        h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
944        if(IS_INTRA16x16(mb_type)){
945            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
946                return -1; //FIXME continue if partitioned and other return -1 too
947            }
948
949            assert((cbp&15) == 0 || (cbp&15) == 15);
950
951            if(cbp&15){
952                for(i8x8=0; i8x8<4; i8x8++){
953                    for(i4x4=0; i4x4<4; i4x4++){
954                        const int index= i4x4 + 4*i8x8;
955                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
956                            return -1;
957                        }
958                    }
959                }
960            }else{
961                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
962            }
963        }else{
964            for(i8x8=0; i8x8<4; i8x8++){
965                if(cbp & (1<<i8x8)){
966                    if(IS_8x8DCT(mb_type)){
967                        DCTELEM *buf = &h->mb[64*i8x8];
968                        uint8_t *nnz;
969                        for(i4x4=0; i4x4<4; i4x4++){
970                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
971                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
972                                return -1;
973                        }
974                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
975                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
976                    }else{
977                        for(i4x4=0; i4x4<4; i4x4++){
978                            const int index= i4x4 + 4*i8x8;
979
980                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
981                                return -1;
982                            }
983                        }
984                    }
985                }else{
986                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
987                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
988                }
989            }
990        }
991
992        if(cbp&0x30){
993            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
994                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
995                    return -1;
996                }
997        }
998
999        if(cbp&0x20){
1000            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1001                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1002                for(i4x4=0; i4x4<4; i4x4++){
1003                    const int index= 16 + 4*chroma_idx + i4x4;
1004                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1005                        return -1;
1006                    }
1007                }
1008            }
1009        }else{
1010            uint8_t * const nnz= &h->non_zero_count_cache[0];
1011            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1012            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1013        }
1014    }else{
1015        uint8_t * const nnz= &h->non_zero_count_cache[0];
1016        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
1017        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
1018        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
1019    }
1020    s->current_picture.qscale_table[mb_xy]= s->qscale;
1021    write_back_non_zero_count(h);
1022
1023    if(MB_MBAFF){
1024        h->ref_count[0] >>= 1;
1025        h->ref_count[1] >>= 1;
1026    }
1027
1028    return 0;
1029}
1030
1031