1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28#include "internal.h"
29#include "dsputil.h"
30#include "avcodec.h"
31#include "mpegvideo.h"
32#include "h264.h"
33#include "h264data.h"
34#include "h264_parser.h"
35#include "golomb.h"
36#include "mathops.h"
37#include "rectangle.h"
38#include "vdpau_internal.h"
39
40#include "cabac.h"
41#if ARCH_X86
42#include "x86/h264_i386.h"
43#endif
44
45//#undef NDEBUG
46#include <assert.h>
47
48/**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
51 */
52#define DELAYED_PIC_REF 4
53
54static VLC coeff_token_vlc[4];
55static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57
58static VLC chroma_dc_coeff_token_vlc;
59static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60static const int chroma_dc_coeff_token_vlc_table_size = 256;
61
62static VLC total_zeros_vlc[15];
63static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64static const int total_zeros_vlc_tables_size = 512;
65
66static VLC chroma_dc_total_zeros_vlc[3];
67static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69
70static VLC run_vlc[6];
71static VLC_TYPE run_vlc_tables[6][8][2];
72static const int run_vlc_tables_size = 8;
73
74static VLC run7_vlc;
75static VLC_TYPE run7_vlc_table[96][2];
76static const int run7_vlc_table_size = 96;
77
78static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82static Picture * remove_long(H264Context *h, int i, int ref_mask);
83
84static av_always_inline uint32_t pack16to32(int a, int b){
85#ifdef WORDS_BIGENDIAN
86   return (b&0xFFFF) + (a<<16);
87#else
88   return (a&0xFFFF) + (b<<16);
89#endif
90}
91
92static const uint8_t rem6[52]={
930, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
94};
95
96static const uint8_t div6[52]={
970, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
98};
99
100static const uint8_t left_block_options[4][8]={
101    {0,1,2,3,7,10,8,11},
102    {2,2,3,3,8,11,8,11},
103    {0,0,1,1,7,10,7,10},
104    {0,2,0,2,7,10,7,10}
105};
106
107#define LEVEL_TAB_BITS 8
108static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109
110static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111    MpegEncContext * const s = &h->s;
112    const int mb_xy= h->mb_xy;
113    int topleft_xy, top_xy, topright_xy, left_xy[2];
114    int topleft_type, top_type, topright_type, left_type[2];
115    const uint8_t * left_block;
116    int topleft_partition= -1;
117    int i;
118
119    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
120
121    //FIXME deblocking could skip the intra and nnz parts.
122    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123        return;
124
125    /* Wow, what a mess, why didn't they simplify the interlacing & intra
126     * stuff, I can't imagine that these complex rules are worth it. */
127
128    topleft_xy = top_xy - 1;
129    topright_xy= top_xy + 1;
130    left_xy[1] = left_xy[0] = mb_xy-1;
131    left_block = left_block_options[0];
132    if(FRAME_MBAFF){
133        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
134        const int top_pair_xy      = pair_xy     - s->mb_stride;
135        const int topleft_pair_xy  = top_pair_xy - 1;
136        const int topright_pair_xy = top_pair_xy + 1;
137        const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138        const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139        const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
142        const int bottom = (s->mb_y & 1);
143        tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144
145        if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146            top_xy -= s->mb_stride;
147        }
148        if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149            topleft_xy -= s->mb_stride;
150        } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151            topleft_xy += s->mb_stride;
152            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153            topleft_partition = 0;
154        }
155        if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156            topright_xy -= s->mb_stride;
157        }
158        if (left_mb_field_flag != curr_mb_field_flag) {
159            left_xy[1] = left_xy[0] = pair_xy - 1;
160            if (curr_mb_field_flag) {
161                left_xy[1] += s->mb_stride;
162                left_block = left_block_options[3];
163            } else {
164                left_block= left_block_options[2 - bottom];
165            }
166        }
167    }
168
169    h->top_mb_xy = top_xy;
170    h->left_mb_xy[0] = left_xy[0];
171    h->left_mb_xy[1] = left_xy[1];
172    if(for_deblock){
173        topleft_type = 0;
174        topright_type = 0;
175        top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
176        left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177        left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178
179        if(MB_MBAFF && !IS_INTRA(mb_type)){
180            int list;
181            for(list=0; list<h->list_count; list++){
182                //These values where changed for ease of performing MC, we need to change them back
183                //FIXME maybe we can make MC and loop filter use the same values or prevent
184                //the MC code from changing ref_cache and rather use a temporary array.
185                if(USES_LIST(mb_type,list)){
186                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189                    ref += h->b8_stride;
190                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
192                }
193            }
194        }
195    }else{
196        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
198        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201
202    if(IS_INTRA(mb_type)){
203        int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204        h->topleft_samples_available=
205        h->top_samples_available=
206        h->left_samples_available= 0xFFFF;
207        h->topright_samples_available= 0xEEEA;
208
209        if(!(top_type & type_mask)){
210            h->topleft_samples_available= 0xB3FF;
211            h->top_samples_available= 0x33FF;
212            h->topright_samples_available= 0x26EA;
213        }
214        if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215            if(IS_INTERLACED(mb_type)){
216                if(!(left_type[0] & type_mask)){
217                    h->topleft_samples_available&= 0xDFFF;
218                    h->left_samples_available&= 0x5FFF;
219                }
220                if(!(left_type[1] & type_mask)){
221                    h->topleft_samples_available&= 0xFF5F;
222                    h->left_samples_available&= 0xFF5F;
223                }
224            }else{
225                int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226                                ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227                assert(left_xy[0] == left_xy[1]);
228                if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229                    h->topleft_samples_available&= 0xDF5F;
230                    h->left_samples_available&= 0x5F5F;
231                }
232            }
233        }else{
234            if(!(left_type[0] & type_mask)){
235                h->topleft_samples_available&= 0xDF5F;
236                h->left_samples_available&= 0x5F5F;
237            }
238        }
239
240        if(!(topleft_type & type_mask))
241            h->topleft_samples_available&= 0x7FFF;
242
243        if(!(topright_type & type_mask))
244            h->topright_samples_available&= 0xFBFF;
245
246        if(IS_INTRA4x4(mb_type)){
247            if(IS_INTRA4x4(top_type)){
248                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252            }else{
253                int pred;
254                if(!(top_type & type_mask))
255                    pred= -1;
256                else{
257                    pred= 2;
258                }
259                h->intra4x4_pred_mode_cache[4+8*0]=
260                h->intra4x4_pred_mode_cache[5+8*0]=
261                h->intra4x4_pred_mode_cache[6+8*0]=
262                h->intra4x4_pred_mode_cache[7+8*0]= pred;
263            }
264            for(i=0; i<2; i++){
265                if(IS_INTRA4x4(left_type[i])){
266                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268                }else{
269                    int pred;
270                    if(!(left_type[i] & type_mask))
271                        pred= -1;
272                    else{
273                        pred= 2;
274                    }
275                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
277                }
278            }
279        }
280    }
281    }
282
283
284/*
2850 . T T. T T T T
2861 L . .L . . . .
2872 L . .L . . . .
2883 . T TL . . . .
2894 L . .L . . . .
2905 L . .. . . . .
291*/
292//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293    if(top_type){
294        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298
299        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301
302        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
304
305    }else{
306        h->non_zero_count_cache[4+8*0]=
307        h->non_zero_count_cache[5+8*0]=
308        h->non_zero_count_cache[6+8*0]=
309        h->non_zero_count_cache[7+8*0]=
310
311        h->non_zero_count_cache[1+8*0]=
312        h->non_zero_count_cache[2+8*0]=
313
314        h->non_zero_count_cache[1+8*3]=
315        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
316
317    }
318
319    for (i=0; i<2; i++) {
320        if(left_type[i]){
321            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325        }else{
326            h->non_zero_count_cache[3+8*1 + 2*8*i]=
327            h->non_zero_count_cache[3+8*2 + 2*8*i]=
328            h->non_zero_count_cache[0+8*1 +   8*i]=
329            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
330        }
331    }
332
333    if( h->pps.cabac ) {
334        // top_cbp
335        if(top_type) {
336            h->top_cbp = h->cbp_table[top_xy];
337        } else if(IS_INTRA(mb_type)) {
338            h->top_cbp = 0x1C0;
339        } else {
340            h->top_cbp = 0;
341        }
342        // left_cbp
343        if (left_type[0]) {
344            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345        } else if(IS_INTRA(mb_type)) {
346            h->left_cbp = 0x1C0;
347        } else {
348            h->left_cbp = 0;
349        }
350        if (left_type[0]) {
351            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352        }
353        if (left_type[1]) {
354            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
355        }
356    }
357
358#if 1
359    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360        int list;
361        for(list=0; list<h->list_count; list++){
362            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363                /*if(!h->mv_cache_clean[list]){
364                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366                    h->mv_cache_clean[list]= 1;
367                }*/
368                continue;
369            }
370            h->mv_cache_clean[list]= 0;
371
372            if(USES_LIST(top_type, list)){
373                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379                h->ref_cache[list][scan8[0] + 0 - 1*8]=
380                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381                h->ref_cache[list][scan8[0] + 2 - 1*8]=
382                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383            }else{
384                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389            }
390
391            for(i=0; i<2; i++){
392                int cache_idx = scan8[0] - 1 + i*2*8;
393                if(USES_LIST(left_type[i], list)){
394                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398                    h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399                    h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400                }else{
401                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
402                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403                    h->ref_cache[list][cache_idx  ]=
404                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405                }
406            }
407
408            if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409                continue;
410
411            if(USES_LIST(topleft_type, list)){
412                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416            }else{
417                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419            }
420
421            if(USES_LIST(topright_type, list)){
422                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426            }else{
427                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429            }
430
431            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432                continue;
433
434            h->ref_cache[list][scan8[5 ]+1] =
435            h->ref_cache[list][scan8[7 ]+1] =
436            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
437            h->ref_cache[list][scan8[4 ]] =
438            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
444
445            if( h->pps.cabac ) {
446                /* XXX beurk, Load mvd */
447                if(USES_LIST(top_type, list)){
448                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453                }else{
454                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458                }
459                if(USES_LIST(left_type[0], list)){
460                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463                }else{
464                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466                }
467                if(USES_LIST(left_type[1], list)){
468                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471                }else{
472                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474                }
475                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480
481                if(h->slice_type_nos == FF_B_TYPE){
482                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483
484                    if(IS_DIRECT(top_type)){
485                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486                    }else if(IS_8X8(top_type)){
487                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490                    }else{
491                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492                    }
493
494                    if(IS_DIRECT(left_type[0]))
495                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496                    else if(IS_8X8(left_type[0]))
497                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498                    else
499                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500
501                    if(IS_DIRECT(left_type[1]))
502                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503                    else if(IS_8X8(left_type[1]))
504                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505                    else
506                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
507                }
508            }
509
510            if(FRAME_MBAFF){
511#define MAP_MVS\
512                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522                if(MB_FIELD){
523#define MAP_F2F(idx, mb_type)\
524                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525                        h->ref_cache[list][idx] <<= 1;\
526                        h->mv_cache[list][idx][1] /= 2;\
527                        h->mvd_cache[list][idx][1] /= 2;\
528                    }
529                    MAP_MVS
530#undef MAP_F2F
531                }else{
532#define MAP_F2F(idx, mb_type)\
533                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534                        h->ref_cache[list][idx] >>= 1;\
535                        h->mv_cache[list][idx][1] <<= 1;\
536                        h->mvd_cache[list][idx][1] <<= 1;\
537                    }
538                    MAP_MVS
539#undef MAP_F2F
540                }
541            }
542        }
543    }
544#endif
545
546    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
547}
548
549static inline void write_back_intra_pred_mode(H264Context *h){
550    const int mb_xy= h->mb_xy;
551
552    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559}
560
561/**
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 */
564static inline int check_intra4x4_pred_mode(H264Context *h){
565    MpegEncContext * const s = &h->s;
566    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568    int i;
569
570    if(!(h->top_samples_available&0x8000)){
571        for(i=0; i<4; i++){
572            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573            if(status<0){
574                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575                return -1;
576            } else if(status){
577                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
578            }
579        }
580    }
581
582    if((h->left_samples_available&0x8888)!=0x8888){
583        static const int mask[4]={0x8000,0x2000,0x80,0x20};
584        for(i=0; i<4; i++){
585            if(!(h->left_samples_available&mask[i])){
586                int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587                if(status<0){
588                    av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589                    return -1;
590                } else if(status){
591                    h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592                }
593            }
594        }
595    }
596
597    return 0;
598} //FIXME cleanup like next
599
600/**
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 */
603static inline int check_intra_pred_mode(H264Context *h, int mode){
604    MpegEncContext * const s = &h->s;
605    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
607
608    if(mode > 6U) {
609        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610        return -1;
611    }
612
613    if(!(h->top_samples_available&0x8000)){
614        mode= top[ mode ];
615        if(mode<0){
616            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617            return -1;
618        }
619    }
620
621    if((h->left_samples_available&0x8080) != 0x8080){
622        mode= left[ mode ];
623        if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624            mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625        }
626        if(mode<0){
627            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
628            return -1;
629        }
630    }
631
632    return mode;
633}
634
635/**
636 * gets the predicted intra4x4 prediction mode.
637 */
638static inline int pred_intra_mode(H264Context *h, int n){
639    const int index8= scan8[n];
640    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642    const int min= FFMIN(left, top);
643
644    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645
646    if(min<0) return DC_PRED;
647    else      return min;
648}
649
650static inline void write_back_non_zero_count(H264Context *h){
651    const int mb_xy= h->mb_xy;
652
653    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660
661    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664
665    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
668}
669
670/**
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
673 */
674static inline int pred_non_zero_count(H264Context *h, int n){
675    const int index8= scan8[n];
676    const int left= h->non_zero_count_cache[index8 - 1];
677    const int top = h->non_zero_count_cache[index8 - 8];
678    int i= left + top;
679
680    if(i<64) i= (i+1)>>1;
681
682    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
683
684    return i&31;
685}
686
687static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689    MpegEncContext *s = &h->s;
690
691    /* there is no consistent mapping of mvs to neighboring locations that will
692     * make mbaff happy, so we can't move all this logic to fill_caches */
693    if(FRAME_MBAFF){
694        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695        const int16_t *mv;
696        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697        *C = h->mv_cache[list][scan8[0]-2];
698
699        if(!MB_FIELD
700           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702            if(IS_INTERLACED(mb_types[topright_xy])){
703#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704                const int x4 = X4, y4 = Y4;\
705                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706                if(!USES_LIST(mb_type,list))\
707                    return LIST_NOT_USED;\
708                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712
713                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714            }
715        }
716        if(topright_ref == PART_NOT_AVAILABLE
717           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719            if(!MB_FIELD
720               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722            }
723            if(MB_FIELD
724               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725               && i >= scan8[0]+8){
726                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
728            }
729        }
730#undef SET_DIAG_MV
731    }
732
733    if(topright_ref != PART_NOT_AVAILABLE){
734        *C= h->mv_cache[list][ i - 8 + part_width ];
735        return topright_ref;
736    }else{
737        tprintf(s->avctx, "topright MV not available\n");
738
739        *C= h->mv_cache[list][ i - 8 - 1 ];
740        return h->ref_cache[list][ i - 8 - 1 ];
741    }
742}
743
744/**
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
750 */
751static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752    const int index8= scan8[n];
753    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
754    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
755    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757    const int16_t * C;
758    int diagonal_ref, match_count;
759
760    assert(part_width==1 || part_width==2 || part_width==4);
761
762/* mv_cache
763  B . . A T T T T
764  U . . L . . , .
765  U . . L . . . .
766  U . . L . . , .
767  . . . L . . . .
768*/
769
770    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772    tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773    if(match_count > 1){ //most common
774        *mx= mid_pred(A[0], B[0], C[0]);
775        *my= mid_pred(A[1], B[1], C[1]);
776    }else if(match_count==1){
777        if(left_ref==ref){
778            *mx= A[0];
779            *my= A[1];
780        }else if(top_ref==ref){
781            *mx= B[0];
782            *my= B[1];
783        }else{
784            *mx= C[0];
785            *my= C[1];
786        }
787    }else{
788        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789            *mx= A[0];
790            *my= A[1];
791        }else{
792            *mx= mid_pred(A[0], B[0], C[0]);
793            *my= mid_pred(A[1], B[1], C[1]);
794        }
795    }
796
797    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
798}
799
800/**
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
805 */
806static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807    if(n==0){
808        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
809        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810
811        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
812
813        if(top_ref == ref){
814            *mx= B[0];
815            *my= B[1];
816            return;
817        }
818    }else{
819        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
820        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821
822        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
823
824        if(left_ref == ref){
825            *mx= A[0];
826            *my= A[1];
827            return;
828        }
829    }
830
831    //RARE
832    pred_motion(h, n, 4, list, ref, mx, my);
833}
834
835/**
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
840 */
841static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842    if(n==0){
843        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
844        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
845
846        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
847
848        if(left_ref == ref){
849            *mx= A[0];
850            *my= A[1];
851            return;
852        }
853    }else{
854        const int16_t * C;
855        int diagonal_ref;
856
857        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858
859        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860
861        if(diagonal_ref == ref){
862            *mx= C[0];
863            *my= C[1];
864            return;
865        }
866    }
867
868    //RARE
869    pred_motion(h, n, 2, list, ref, mx, my);
870}
871
872static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875
876    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877
878    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879       || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880       || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
881
882        *mx = *my = 0;
883        return;
884    }
885
886    pred_motion(h, 0, 4, 0, 0, mx, my);
887
888    return;
889}
890
891static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892    int poc0 = h->ref_list[0][i].poc;
893    int td = av_clip(poc1 - poc0, -128, 127);
894    if(td == 0 || h->ref_list[0][i].long_ref){
895        return 256;
896    }else{
897        int tb = av_clip(poc - poc0, -128, 127);
898        int tx = (16384 + (FFABS(td) >> 1)) / td;
899        return av_clip((tb*tx + 32) >> 6, -1024, 1023);
900    }
901}
902
903static inline void direct_dist_scale_factor(H264Context * const h){
904    MpegEncContext * const s = &h->s;
905    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906    const int poc1 = h->ref_list[1][0].poc;
907    int i, field;
908    for(field=0; field<2; field++){
909        const int poc  = h->s.current_picture_ptr->field_poc[field];
910        const int poc1 = h->ref_list[1][0].field_poc[field];
911        for(i=0; i < 2*h->ref_count[0]; i++)
912            h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
913    }
914
915    for(i=0; i<h->ref_count[0]; i++){
916        h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
917    }
918}
919
920static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921    MpegEncContext * const s = &h->s;
922    Picture * const ref1 = &h->ref_list[1][0];
923    int j, old_ref, rfield;
924    int start= mbafi ? 16                      : 0;
925    int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926    int interl= mbafi || s->picture_structure != PICT_FRAME;
927
928    /* bogus; fills in for missing frames */
929    memset(map[list], 0, sizeof(map[list]));
930
931    for(rfield=0; rfield<2; rfield++){
932        for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933            int poc = ref1->ref_poc[colfield][list][old_ref];
934
935            if     (!interl)
936                poc |= 3;
937            else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938                poc= (poc&~3) + rfield + 1;
939
940            for(j=start; j<end; j++){
941                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942                    int cur_ref= mbafi ? (j-16)^field : j;
943                    map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944                    if(rfield == field)
945                        map[list][old_ref] = cur_ref;
946                    break;
947                }
948            }
949        }
950    }
951}
952
953static inline void direct_ref_list_init(H264Context * const h){
954    MpegEncContext * const s = &h->s;
955    Picture * const ref1 = &h->ref_list[1][0];
956    Picture * const cur = s->current_picture_ptr;
957    int list, j, field;
958    int sidx= (s->picture_structure&1)^1;
959    int ref1sidx= (ref1->reference&1)^1;
960
961    for(list=0; list<2; list++){
962        cur->ref_count[sidx][list] = h->ref_count[list];
963        for(j=0; j<h->ref_count[list]; j++)
964            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
965    }
966
967    if(s->picture_structure == PICT_FRAME){
968        memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969        memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
970    }
971
972    cur->mbaff= FRAME_MBAFF;
973
974    if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975        return;
976
977    for(list=0; list<2; list++){
978        fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979        for(field=0; field<2; field++)
980            fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
981    }
982}
983
984static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985    MpegEncContext * const s = &h->s;
986    int b8_stride = h->b8_stride;
987    int b4_stride = h->b_stride;
988    int mb_xy = h->mb_xy;
989    int mb_type_col[2];
990    const int16_t (*l1mv0)[2], (*l1mv1)[2];
991    const int8_t *l1ref0, *l1ref1;
992    const int is_b8x8 = IS_8X8(*mb_type);
993    unsigned int sub_mb_type;
994    int i8, i4;
995
996#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997
998    if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
999        if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
1000            int cur_poc = s->current_picture_ptr->poc;
1001            int *col_poc = h->ref_list[1]->field_poc;
1002            int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1003            mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004            b8_stride = 0;
1005        }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1006            int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1007            mb_xy += s->mb_stride*fieldoff;
1008        }
1009        goto single_col;
1010    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
1011        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
1012            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1013            mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1014            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1015            b8_stride *= 3;
1016            b4_stride *= 6;
1017            //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1018            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1019                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020                && !is_b8x8){
1021                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023            }else{
1024                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1025                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026            }
1027        }else{                                           //     AFR/FR    -> AFR/FR
1028single_col:
1029            mb_type_col[0] =
1030            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1031            if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1032                /* FIXME save sub mb types from previous frames (or derive from MVs)
1033                * so we know exactly what block size to use */
1034                sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1035                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1036            }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1037                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039            }else{
1040                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1041                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042            }
1043        }
1044    }
1045
1046    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1047    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1048    l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1049    l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1050    if(!b8_stride){
1051        if(s->mb_y&1){
1052            l1ref0 += h->b8_stride;
1053            l1ref1 += h->b8_stride;
1054            l1mv0  +=  2*b4_stride;
1055            l1mv1  +=  2*b4_stride;
1056        }
1057    }
1058
1059    if(h->direct_spatial_mv_pred){
1060        int ref[2];
1061        int mv[2][2];
1062        int list;
1063
1064        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065
1066        /* ref = min(neighbors) */
1067        for(list=0; list<2; list++){
1068            int refa = h->ref_cache[list][scan8[0] - 1];
1069            int refb = h->ref_cache[list][scan8[0] - 8];
1070            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1071            if(refc == PART_NOT_AVAILABLE)
1072                refc = h->ref_cache[list][scan8[0] - 8 - 1];
1073            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1074            if(ref[list] < 0)
1075                ref[list] = -1;
1076        }
1077
1078        if(ref[0] < 0 && ref[1] < 0){
1079            ref[0] = ref[1] = 0;
1080            mv[0][0] = mv[0][1] =
1081            mv[1][0] = mv[1][1] = 0;
1082        }else{
1083            for(list=0; list<2; list++){
1084                if(ref[list] >= 0)
1085                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086                else
1087                    mv[list][0] = mv[list][1] = 0;
1088            }
1089        }
1090
1091        if(ref[1] < 0){
1092            if(!is_b8x8)
1093                *mb_type &= ~MB_TYPE_L1;
1094            sub_mb_type &= ~MB_TYPE_L1;
1095        }else if(ref[0] < 0){
1096            if(!is_b8x8)
1097                *mb_type &= ~MB_TYPE_L0;
1098            sub_mb_type &= ~MB_TYPE_L0;
1099        }
1100
1101        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1102            for(i8=0; i8<4; i8++){
1103                int x8 = i8&1;
1104                int y8 = i8>>1;
1105                int xy8 = x8+y8*b8_stride;
1106                int xy4 = 3*x8+y8*b4_stride;
1107                int a=0, b=0;
1108
1109                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110                    continue;
1111                h->sub_mb_type[i8] = sub_mb_type;
1112
1113                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1114                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1115                if(!IS_INTRA(mb_type_col[y8])
1116                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1117                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118                    if(ref[0] > 0)
1119                        a= pack16to32(mv[0][0],mv[0][1]);
1120                    if(ref[1] > 0)
1121                        b= pack16to32(mv[1][0],mv[1][1]);
1122                }else{
1123                    a= pack16to32(mv[0][0],mv[0][1]);
1124                    b= pack16to32(mv[1][0],mv[1][1]);
1125                }
1126                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1127                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128            }
1129        }else if(IS_16X16(*mb_type)){
1130            int a=0, b=0;
1131
1132            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1133            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1134            if(!IS_INTRA(mb_type_col[0])
1135               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1136                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1137                       && (h->x264_build>33 || !h->x264_build)))){
1138                if(ref[0] > 0)
1139                    a= pack16to32(mv[0][0],mv[0][1]);
1140                if(ref[1] > 0)
1141                    b= pack16to32(mv[1][0],mv[1][1]);
1142            }else{
1143                a= pack16to32(mv[0][0],mv[0][1]);
1144                b= pack16to32(mv[1][0],mv[1][1]);
1145            }
1146            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1147            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148        }else{
1149            for(i8=0; i8<4; i8++){
1150                const int x8 = i8&1;
1151                const int y8 = i8>>1;
1152
1153                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154                    continue;
1155                h->sub_mb_type[i8] = sub_mb_type;
1156
1157                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1158                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1159                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1160                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1161
1162                /* col_zero_flag */
1163                if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
1164                                              || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1165                                                  && (h->x264_build>33 || !h->x264_build)))){
1166                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1167                    if(IS_SUB_8X8(sub_mb_type)){
1168                        const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1169                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170                            if(ref[0] == 0)
1171                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172                            if(ref[1] == 0)
1173                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174                        }
1175                    }else
1176                    for(i4=0; i4<4; i4++){
1177                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1178                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179                            if(ref[0] == 0)
1180                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181                            if(ref[1] == 0)
1182                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1183                        }
1184                    }
1185                }
1186            }
1187        }
1188    }else{ /* direct temporal mv pred */
1189        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1190        const int *dist_scale_factor = h->dist_scale_factor;
1191        int ref_offset= 0;
1192
1193        if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1194            map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1195            map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1196            dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
1197        }
1198        if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1199            ref_offset += 16;
1200
1201        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1202            /* FIXME assumes direct_8x8_inference == 1 */
1203            int y_shift  = 2*!IS_INTERLACED(*mb_type);
1204
1205            for(i8=0; i8<4; i8++){
1206                const int x8 = i8&1;
1207                const int y8 = i8>>1;
1208                int ref0, scale;
1209                const int16_t (*l1mv)[2]= l1mv0;
1210
1211                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212                    continue;
1213                h->sub_mb_type[i8] = sub_mb_type;
1214
1215                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1216                if(IS_INTRA(mb_type_col[y8])){
1217                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1218                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1219                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1220                    continue;
1221                }
1222
1223                ref0 = l1ref0[x8 + y8*b8_stride];
1224                if(ref0 >= 0)
1225                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226                else{
1227                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1228                    l1mv= l1mv1;
1229                }
1230                scale = dist_scale_factor[ref0];
1231                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1232
1233                {
1234                    const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1235                    int my_col = (mv_col[1]<<y_shift)/2;
1236                    int mx = (scale * mv_col[0] + 128) >> 8;
1237                    int my = (scale * my_col + 128) >> 8;
1238                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1239                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1240                }
1241            }
1242            return;
1243        }
1244
1245        /* one-to-one mv scaling */
1246
1247        if(IS_16X16(*mb_type)){
1248            int ref, mv0, mv1;
1249
1250            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1251            if(IS_INTRA(mb_type_col[0])){
1252                ref=mv0=mv1=0;
1253            }else{
1254                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1255                                                : map_col_to_list0[1][l1ref1[0] + ref_offset];
1256                const int scale = dist_scale_factor[ref0];
1257                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258                int mv_l0[2];
1259                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1260                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261                ref= ref0;
1262                mv0= pack16to32(mv_l0[0],mv_l0[1]);
1263                mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264            }
1265            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1266            fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1267            fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268        }else{
1269            for(i8=0; i8<4; i8++){
1270                const int x8 = i8&1;
1271                const int y8 = i8>>1;
1272                int ref0, scale;
1273                const int16_t (*l1mv)[2]= l1mv0;
1274
1275                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276                    continue;
1277                h->sub_mb_type[i8] = sub_mb_type;
1278                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1279                if(IS_INTRA(mb_type_col[0])){
1280                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1281                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1282                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1283                    continue;
1284                }
1285
1286                ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287                if(ref0 >= 0)
1288                    ref0 = map_col_to_list0[0][ref0];
1289                else{
1290                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1291                    l1mv= l1mv1;
1292                }
1293                scale = dist_scale_factor[ref0];
1294
1295                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1296                if(IS_SUB_8X8(sub_mb_type)){
1297                    const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1298                    int mx = (scale * mv_col[0] + 128) >> 8;
1299                    int my = (scale * mv_col[1] + 128) >> 8;
1300                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1301                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302                }else
1303                for(i4=0; i4<4; i4++){
1304                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1305                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1306                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1307                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1308                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1309                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1310                }
1311            }
1312        }
1313    }
1314}
1315
1316static inline void write_back_motion(H264Context *h, int mb_type){
1317    MpegEncContext * const s = &h->s;
1318    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1319    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1320    int list;
1321
1322    if(!USES_LIST(mb_type, 0))
1323        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324
1325    for(list=0; list<h->list_count; list++){
1326        int y;
1327        if(!USES_LIST(mb_type, list))
1328            continue;
1329
1330        for(y=0; y<4; y++){
1331            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1332            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333        }
1334        if( h->pps.cabac ) {
1335            if(IS_SKIP(mb_type))
1336                fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1337            else
1338            for(y=0; y<4; y++){
1339                *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1340                *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1341            }
1342        }
1343
1344        {
1345            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1346            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1347            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1348            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1349            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1350        }
1351    }
1352
1353    if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1354        if(IS_8X8(mb_type)){
1355            uint8_t *direct_table = &h->direct_table[b8_xy];
1356            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1357            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1358            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1359        }
1360    }
1361}
1362
1363const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1364    int i, si, di;
1365    uint8_t *dst;
1366    int bufidx;
1367
1368//    src[0]&0x80;                //forbidden bit
1369    h->nal_ref_idc= src[0]>>5;
1370    h->nal_unit_type= src[0]&0x1F;
1371
1372    src++; length--;
1373#if 0
1374    for(i=0; i<length; i++)
1375        printf("%2X ", src[i]);
1376#endif
1377
1378#if HAVE_FAST_UNALIGNED
1379# if HAVE_FAST_64BIT
1380#   define RS 7
1381    for(i=0; i+1<length; i+=9){
1382        if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1383# else
1384#   define RS 3
1385    for(i=0; i+1<length; i+=5){
1386        if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1387# endif
1388            continue;
1389        if(i>0 && !src[i]) i--;
1390        while(src[i]) i++;
1391#else
1392#   define RS 0
1393    for(i=0; i+1<length; i+=2){
1394        if(src[i]) continue;
1395        if(i>0 && src[i-1]==0) i--;
1396#endif
1397        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1398            if(src[i+2]!=3){
1399                /* startcode, so we must be past the end */
1400                length=i;
1401            }
1402            break;
1403        }
1404        i-= RS;
1405    }
1406
1407    if(i>=length-1){ //no escaped 0
1408        *dst_length= length;
1409        *consumed= length+1; //+1 for the header
1410        return src;
1411    }
1412
1413    bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1414    h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1415    dst= h->rbsp_buffer[bufidx];
1416
1417    if (dst == NULL){
1418        return NULL;
1419    }
1420
1421//printf("decoding esc\n");
1422    memcpy(dst, src, i);
1423    si=di=i;
1424    while(si+2<length){
1425        //remove escapes (very rare 1:2^22)
1426        if(src[si+2]>3){
1427            dst[di++]= src[si++];
1428            dst[di++]= src[si++];
1429        }else if(src[si]==0 && src[si+1]==0){
1430            if(src[si+2]==3){ //escape
1431                dst[di++]= 0;
1432                dst[di++]= 0;
1433                si+=3;
1434                continue;
1435            }else //next start code
1436                goto nsc;
1437        }
1438
1439        dst[di++]= src[si++];
1440    }
1441    while(si<length)
1442        dst[di++]= src[si++];
1443nsc:
1444
1445    memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1446
1447    *dst_length= di;
1448    *consumed= si + 1;//+1 for the header
1449//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1450    return dst;
1451}
1452
1453int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1454    int v= *src;
1455    int r;
1456
1457    tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1458
1459    for(r=1; r<9; r++){
1460        if(v&1) return r;
1461        v>>=1;
1462    }
1463    return 0;
1464}
1465
1466/**
1467 * IDCT transforms the 16 dc values and dequantizes them.
1468 * @param qp quantization parameter
1469 */
1470static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1471#define stride 16
1472    int i;
1473    int temp[16]; //FIXME check if this is a good idea
1474    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1475    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1476
1477//memset(block, 64, 2*256);
1478//return;
1479    for(i=0; i<4; i++){
1480        const int offset= y_offset[i];
1481        const int z0= block[offset+stride*0] + block[offset+stride*4];
1482        const int z1= block[offset+stride*0] - block[offset+stride*4];
1483        const int z2= block[offset+stride*1] - block[offset+stride*5];
1484        const int z3= block[offset+stride*1] + block[offset+stride*5];
1485
1486        temp[4*i+0]= z0+z3;
1487        temp[4*i+1]= z1+z2;
1488        temp[4*i+2]= z1-z2;
1489        temp[4*i+3]= z0-z3;
1490    }
1491
1492    for(i=0; i<4; i++){
1493        const int offset= x_offset[i];
1494        const int z0= temp[4*0+i] + temp[4*2+i];
1495        const int z1= temp[4*0+i] - temp[4*2+i];
1496        const int z2= temp[4*1+i] - temp[4*3+i];
1497        const int z3= temp[4*1+i] + temp[4*3+i];
1498
1499        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1500        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1501        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1502        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1503    }
1504}
1505
1506#if 0
1507/**
1508 * DCT transforms the 16 dc values.
1509 * @param qp quantization parameter ??? FIXME
1510 */
1511static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1512//    const int qmul= dequant_coeff[qp][0];
1513    int i;
1514    int temp[16]; //FIXME check if this is a good idea
1515    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1516    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1517
1518    for(i=0; i<4; i++){
1519        const int offset= y_offset[i];
1520        const int z0= block[offset+stride*0] + block[offset+stride*4];
1521        const int z1= block[offset+stride*0] - block[offset+stride*4];
1522        const int z2= block[offset+stride*1] - block[offset+stride*5];
1523        const int z3= block[offset+stride*1] + block[offset+stride*5];
1524
1525        temp[4*i+0]= z0+z3;
1526        temp[4*i+1]= z1+z2;
1527        temp[4*i+2]= z1-z2;
1528        temp[4*i+3]= z0-z3;
1529    }
1530
1531    for(i=0; i<4; i++){
1532        const int offset= x_offset[i];
1533        const int z0= temp[4*0+i] + temp[4*2+i];
1534        const int z1= temp[4*0+i] - temp[4*2+i];
1535        const int z2= temp[4*1+i] - temp[4*3+i];
1536        const int z3= temp[4*1+i] + temp[4*3+i];
1537
1538        block[stride*0 +offset]= (z0 + z3)>>1;
1539        block[stride*2 +offset]= (z1 + z2)>>1;
1540        block[stride*8 +offset]= (z1 - z2)>>1;
1541        block[stride*10+offset]= (z0 - z3)>>1;
1542    }
1543}
1544#endif
1545
1546#undef xStride
1547#undef stride
1548
1549static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1550    const int stride= 16*2;
1551    const int xStride= 16;
1552    int a,b,c,d,e;
1553
1554    a= block[stride*0 + xStride*0];
1555    b= block[stride*0 + xStride*1];
1556    c= block[stride*1 + xStride*0];
1557    d= block[stride*1 + xStride*1];
1558
1559    e= a-b;
1560    a= a+b;
1561    b= c-d;
1562    c= c+d;
1563
1564    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1565    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1566    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1567    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1568}
1569
1570#if 0
1571static void chroma_dc_dct_c(DCTELEM *block){
1572    const int stride= 16*2;
1573    const int xStride= 16;
1574    int a,b,c,d,e;
1575
1576    a= block[stride*0 + xStride*0];
1577    b= block[stride*0 + xStride*1];
1578    c= block[stride*1 + xStride*0];
1579    d= block[stride*1 + xStride*1];
1580
1581    e= a-b;
1582    a= a+b;
1583    b= c-d;
1584    c= c+d;
1585
1586    block[stride*0 + xStride*0]= (a+c);
1587    block[stride*0 + xStride*1]= (e+b);
1588    block[stride*1 + xStride*0]= (a-c);
1589    block[stride*1 + xStride*1]= (e-b);
1590}
1591#endif
1592
1593/**
1594 * gets the chroma qp.
1595 */
1596static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1597    return h->pps.chroma_qp_table[t][qscale];
1598}
1599
1600static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1601                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1602                           int src_x_offset, int src_y_offset,
1603                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1604    MpegEncContext * const s = &h->s;
1605    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1606    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1607    const int luma_xy= (mx&3) + ((my&3)<<2);
1608    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1609    uint8_t * src_cb, * src_cr;
1610    int extra_width= h->emu_edge_width;
1611    int extra_height= h->emu_edge_height;
1612    int emu=0;
1613    const int full_mx= mx>>2;
1614    const int full_my= my>>2;
1615    const int pic_width  = 16*s->mb_width;
1616    const int pic_height = 16*s->mb_height >> MB_FIELD;
1617
1618    if(mx&7) extra_width -= 3;
1619    if(my&7) extra_height -= 3;
1620
1621    if(   full_mx < 0-extra_width
1622       || full_my < 0-extra_height
1623       || full_mx + 16/*FIXME*/ > pic_width + extra_width
1624       || full_my + 16/*FIXME*/ > pic_height + extra_height){
1625        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1626            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1627        emu=1;
1628    }
1629
1630    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1631    if(!square){
1632        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1633    }
1634
1635    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1636
1637    if(MB_FIELD){
1638        // chroma offset when predicting from a field of opposite parity
1639        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1640        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1641    }
1642    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1643    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1644
1645    if(emu){
1646        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1647            src_cb= s->edge_emu_buffer;
1648    }
1649    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1650
1651    if(emu){
1652        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1653            src_cr= s->edge_emu_buffer;
1654    }
1655    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1656}
1657
1658static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1659                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1660                           int x_offset, int y_offset,
1661                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1662                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1663                           int list0, int list1){
1664    MpegEncContext * const s = &h->s;
1665    qpel_mc_func *qpix_op=  qpix_put;
1666    h264_chroma_mc_func chroma_op= chroma_put;
1667
1668    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1669    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1670    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1671    x_offset += 8*s->mb_x;
1672    y_offset += 8*(s->mb_y >> MB_FIELD);
1673
1674    if(list0){
1675        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1676        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1677                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678                           qpix_op, chroma_op);
1679
1680        qpix_op=  qpix_avg;
1681        chroma_op= chroma_avg;
1682    }
1683
1684    if(list1){
1685        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1686        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1687                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688                           qpix_op, chroma_op);
1689    }
1690}
1691
1692static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1693                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1694                           int x_offset, int y_offset,
1695                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1696                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1697                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1698                           int list0, int list1){
1699    MpegEncContext * const s = &h->s;
1700
1701    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1702    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1703    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1704    x_offset += 8*s->mb_x;
1705    y_offset += 8*(s->mb_y >> MB_FIELD);
1706
1707    if(list0 && list1){
1708        /* don't optimize for luma-only case, since B-frames usually
1709         * use implicit weights => chroma too. */
1710        uint8_t *tmp_cb = s->obmc_scratchpad;
1711        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1712        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1713        int refn0 = h->ref_cache[0][ scan8[n] ];
1714        int refn1 = h->ref_cache[1][ scan8[n] ];
1715
1716        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1717                    dest_y, dest_cb, dest_cr,
1718                    x_offset, y_offset, qpix_put, chroma_put);
1719        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1720                    tmp_y, tmp_cb, tmp_cr,
1721                    x_offset, y_offset, qpix_put, chroma_put);
1722
1723        if(h->use_weight == 2){
1724            int weight0 = h->implicit_weight[refn0][refn1];
1725            int weight1 = 64 - weight0;
1726            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
1727            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1729        }else{
1730            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1731                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1732                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1733            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1734                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1735                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1736            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1737                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1738                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1739        }
1740    }else{
1741        int list = list1 ? 1 : 0;
1742        int refn = h->ref_cache[list][ scan8[n] ];
1743        Picture *ref= &h->ref_list[list][refn];
1744        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1745                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
1746                    qpix_put, chroma_put);
1747
1748        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1749                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
1750        if(h->use_weight_chroma){
1751            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1752                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1753            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1755        }
1756    }
1757}
1758
1759static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1760                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1761                           int x_offset, int y_offset,
1762                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1763                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1764                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1765                           int list0, int list1){
1766    if((h->use_weight==2 && list0 && list1
1767        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1768       || h->use_weight==1)
1769        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1770                         x_offset, y_offset, qpix_put, chroma_put,
1771                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1772    else
1773        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1774                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1775}
1776
1777static inline void prefetch_motion(H264Context *h, int list){
1778    /* fetch pixels for estimated mv 4 macroblocks ahead
1779     * optimized for 64byte cache lines */
1780    MpegEncContext * const s = &h->s;
1781    const int refn = h->ref_cache[list][scan8[0]];
1782    if(refn >= 0){
1783        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1784        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1785        uint8_t **src= h->ref_list[list][refn].data;
1786        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1787        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1788        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1789        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1790    }
1791}
1792
1793static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1794                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1795                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1796                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1797    MpegEncContext * const s = &h->s;
1798    const int mb_xy= h->mb_xy;
1799    const int mb_type= s->current_picture.mb_type[mb_xy];
1800
1801    assert(IS_INTER(mb_type));
1802
1803    prefetch_motion(h, 0);
1804
1805    if(IS_16X16(mb_type)){
1806        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1807                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1808                &weight_op[0], &weight_avg[0],
1809                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1810    }else if(IS_16X8(mb_type)){
1811        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1812                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1813                &weight_op[1], &weight_avg[1],
1814                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1815        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1816                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1817                &weight_op[1], &weight_avg[1],
1818                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1819    }else if(IS_8X16(mb_type)){
1820        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1821                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1822                &weight_op[2], &weight_avg[2],
1823                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1825                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826                &weight_op[2], &weight_avg[2],
1827                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828    }else{
1829        int i;
1830
1831        assert(IS_8X8(mb_type));
1832
1833        for(i=0; i<4; i++){
1834            const int sub_mb_type= h->sub_mb_type[i];
1835            const int n= 4*i;
1836            int x_offset= (i&1)<<2;
1837            int y_offset= (i&2)<<1;
1838
1839            if(IS_SUB_8X8(sub_mb_type)){
1840                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1841                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1842                    &weight_op[3], &weight_avg[3],
1843                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844            }else if(IS_SUB_8X4(sub_mb_type)){
1845                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1846                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1847                    &weight_op[4], &weight_avg[4],
1848                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1849                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1850                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1851                    &weight_op[4], &weight_avg[4],
1852                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853            }else if(IS_SUB_4X8(sub_mb_type)){
1854                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1856                    &weight_op[5], &weight_avg[5],
1857                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1859                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860                    &weight_op[5], &weight_avg[5],
1861                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862            }else{
1863                int j;
1864                assert(IS_SUB_4X4(sub_mb_type));
1865                for(j=0; j<4; j++){
1866                    int sub_x_offset= x_offset + 2*(j&1);
1867                    int sub_y_offset= y_offset +   (j&2);
1868                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1869                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870                        &weight_op[6], &weight_avg[6],
1871                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872                }
1873            }
1874        }
1875    }
1876
1877    prefetch_motion(h, 1);
1878}
1879
1880static av_cold void init_cavlc_level_tab(void){
1881    int suffix_length, mask;
1882    unsigned int i;
1883
1884    for(suffix_length=0; suffix_length<7; suffix_length++){
1885        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1886            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1887            int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1888
1889            mask= -(level_code&1);
1890            level_code= (((2+level_code)>>1) ^ mask) - mask;
1891            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1892                cavlc_level_tab[suffix_length][i][0]= level_code;
1893                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1894            }else if(prefix + 1 <= LEVEL_TAB_BITS){
1895                cavlc_level_tab[suffix_length][i][0]= prefix+100;
1896                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1897            }else{
1898                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1899                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1900            }
1901        }
1902    }
1903}
1904
1905static av_cold void decode_init_vlc(void){
1906    static int done = 0;
1907
1908    if (!done) {
1909        int i;
1910        int offset;
1911        done = 1;
1912
1913        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1914        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1915        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1916                 &chroma_dc_coeff_token_len [0], 1, 1,
1917                 &chroma_dc_coeff_token_bits[0], 1, 1,
1918                 INIT_VLC_USE_NEW_STATIC);
1919
1920        offset = 0;
1921        for(i=0; i<4; i++){
1922            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1923            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1924            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1925                     &coeff_token_len [i][0], 1, 1,
1926                     &coeff_token_bits[i][0], 1, 1,
1927                     INIT_VLC_USE_NEW_STATIC);
1928            offset += coeff_token_vlc_tables_size[i];
1929        }
1930        /*
1931         * This is a one time safety check to make sure that
1932         * the packed static coeff_token_vlc table sizes
1933         * were initialized correctly.
1934         */
1935        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1936
1937        for(i=0; i<3; i++){
1938            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1939            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1940            init_vlc(&chroma_dc_total_zeros_vlc[i],
1941                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1942                     &chroma_dc_total_zeros_len [i][0], 1, 1,
1943                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
1944                     INIT_VLC_USE_NEW_STATIC);
1945        }
1946        for(i=0; i<15; i++){
1947            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1948            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1949            init_vlc(&total_zeros_vlc[i],
1950                     TOTAL_ZEROS_VLC_BITS, 16,
1951                     &total_zeros_len [i][0], 1, 1,
1952                     &total_zeros_bits[i][0], 1, 1,
1953                     INIT_VLC_USE_NEW_STATIC);
1954        }
1955
1956        for(i=0; i<6; i++){
1957            run_vlc[i].table = run_vlc_tables[i];
1958            run_vlc[i].table_allocated = run_vlc_tables_size;
1959            init_vlc(&run_vlc[i],
1960                     RUN_VLC_BITS, 7,
1961                     &run_len [i][0], 1, 1,
1962                     &run_bits[i][0], 1, 1,
1963                     INIT_VLC_USE_NEW_STATIC);
1964        }
1965        run7_vlc.table = run7_vlc_table,
1966        run7_vlc.table_allocated = run7_vlc_table_size;
1967        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1968                 &run_len [6][0], 1, 1,
1969                 &run_bits[6][0], 1, 1,
1970                 INIT_VLC_USE_NEW_STATIC);
1971
1972        init_cavlc_level_tab();
1973    }
1974}
1975
1976static void free_tables(H264Context *h){
1977    int i;
1978    H264Context *hx;
1979    av_freep(&h->intra4x4_pred_mode);
1980    av_freep(&h->chroma_pred_mode_table);
1981    av_freep(&h->cbp_table);
1982    av_freep(&h->mvd_table[0]);
1983    av_freep(&h->mvd_table[1]);
1984    av_freep(&h->direct_table);
1985    av_freep(&h->non_zero_count);
1986    av_freep(&h->slice_table_base);
1987    h->slice_table= NULL;
1988
1989    av_freep(&h->mb2b_xy);
1990    av_freep(&h->mb2b8_xy);
1991
1992    for(i = 0; i < h->s.avctx->thread_count; i++) {
1993        hx = h->thread_context[i];
1994        if(!hx) continue;
1995        av_freep(&hx->top_borders[1]);
1996        av_freep(&hx->top_borders[0]);
1997        av_freep(&hx->s.obmc_scratchpad);
1998    }
1999}
2000
2001static void init_dequant8_coeff_table(H264Context *h){
2002    int i,q,x;
2003    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2004    h->dequant8_coeff[0] = h->dequant8_buffer[0];
2005    h->dequant8_coeff[1] = h->dequant8_buffer[1];
2006
2007    for(i=0; i<2; i++ ){
2008        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2009            h->dequant8_coeff[1] = h->dequant8_buffer[0];
2010            break;
2011        }
2012
2013        for(q=0; q<52; q++){
2014            int shift = div6[q];
2015            int idx = rem6[q];
2016            for(x=0; x<64; x++)
2017                h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2018                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2019                    h->pps.scaling_matrix8[i][x]) << shift;
2020        }
2021    }
2022}
2023
2024static void init_dequant4_coeff_table(H264Context *h){
2025    int i,j,q,x;
2026    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2027    for(i=0; i<6; i++ ){
2028        h->dequant4_coeff[i] = h->dequant4_buffer[i];
2029        for(j=0; j<i; j++){
2030            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2031                h->dequant4_coeff[i] = h->dequant4_buffer[j];
2032                break;
2033            }
2034        }
2035        if(j<i)
2036            continue;
2037
2038        for(q=0; q<52; q++){
2039            int shift = div6[q] + 2;
2040            int idx = rem6[q];
2041            for(x=0; x<16; x++)
2042                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2043                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2044                    h->pps.scaling_matrix4[i][x]) << shift;
2045        }
2046    }
2047}
2048
2049static void init_dequant_tables(H264Context *h){
2050    int i,x;
2051    init_dequant4_coeff_table(h);
2052    if(h->pps.transform_8x8_mode)
2053        init_dequant8_coeff_table(h);
2054    if(h->sps.transform_bypass){
2055        for(i=0; i<6; i++)
2056            for(x=0; x<16; x++)
2057                h->dequant4_coeff[i][0][x] = 1<<6;
2058        if(h->pps.transform_8x8_mode)
2059            for(i=0; i<2; i++)
2060                for(x=0; x<64; x++)
2061                    h->dequant8_coeff[i][0][x] = 1<<6;
2062    }
2063}
2064
2065
2066/**
2067 * allocates tables.
2068 * needs width/height
2069 */
2070static int alloc_tables(H264Context *h){
2071    MpegEncContext * const s = &h->s;
2072    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2073    int x,y;
2074
2075    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2076
2077    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2078    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2079    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2080
2081    CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2082    CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2083    CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2084    CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2085
2086    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
2087    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2088
2089    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
2090    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2091    for(y=0; y<s->mb_height; y++){
2092        for(x=0; x<s->mb_width; x++){
2093            const int mb_xy= x + y*s->mb_stride;
2094            const int b_xy = 4*x + 4*y*h->b_stride;
2095            const int b8_xy= 2*x + 2*y*h->b8_stride;
2096
2097            h->mb2b_xy [mb_xy]= b_xy;
2098            h->mb2b8_xy[mb_xy]= b8_xy;
2099        }
2100    }
2101
2102    s->obmc_scratchpad = NULL;
2103
2104    if(!h->dequant4_coeff[0])
2105        init_dequant_tables(h);
2106
2107    return 0;
2108fail:
2109    free_tables(h);
2110    return -1;
2111}
2112
2113/**
2114 * Mimic alloc_tables(), but for every context thread.
2115 */
2116static void clone_tables(H264Context *dst, H264Context *src){
2117    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
2118    dst->non_zero_count           = src->non_zero_count;
2119    dst->slice_table              = src->slice_table;
2120    dst->cbp_table                = src->cbp_table;
2121    dst->mb2b_xy                  = src->mb2b_xy;
2122    dst->mb2b8_xy                 = src->mb2b8_xy;
2123    dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
2124    dst->mvd_table[0]             = src->mvd_table[0];
2125    dst->mvd_table[1]             = src->mvd_table[1];
2126    dst->direct_table             = src->direct_table;
2127
2128    dst->s.obmc_scratchpad = NULL;
2129    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2130}
2131
2132/**
2133 * Init context
2134 * Allocate buffers which are not shared amongst multiple threads.
2135 */
2136static int context_init(H264Context *h){
2137    CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2138    CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2139
2140    return 0;
2141fail:
2142    return -1; // free_tables will clean up for us
2143}
2144
2145static av_cold void common_init(H264Context *h){
2146    MpegEncContext * const s = &h->s;
2147
2148    s->width = s->avctx->width;
2149    s->height = s->avctx->height;
2150    s->codec_id= s->avctx->codec->id;
2151
2152    ff_h264_pred_init(&h->hpc, s->codec_id);
2153
2154    h->dequant_coeff_pps= -1;
2155    s->unrestricted_mv=1;
2156    s->decode=1; //FIXME
2157
2158    dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2159
2160    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2162}
2163
2164/**
2165 * Reset SEI values at the beginning of the frame.
2166 *
2167 * @param h H.264 context.
2168 */
2169static void reset_sei(H264Context *h) {
2170    h->sei_recovery_frame_cnt       = -1;
2171    h->sei_dpb_output_delay         =  0;
2172    h->sei_cpb_removal_delay        = -1;
2173    h->sei_buffering_period_present =  0;
2174}
2175
2176static av_cold int decode_init(AVCodecContext *avctx){
2177    H264Context *h= avctx->priv_data;
2178    MpegEncContext * const s = &h->s;
2179
2180    MPV_decode_defaults(s);
2181
2182    s->avctx = avctx;
2183    common_init(h);
2184
2185    s->out_format = FMT_H264;
2186    s->workaround_bugs= avctx->workaround_bugs;
2187
2188    // set defaults
2189//    s->decode_mb= ff_h263_decode_mb;
2190    s->quarter_sample = 1;
2191    if(!avctx->has_b_frames)
2192    s->low_delay= 1;
2193
2194    if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2195        avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196    else
2197        avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2198    avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2199
2200    decode_init_vlc();
2201
2202    if(avctx->extradata_size > 0 && avctx->extradata &&
2203       *(char *)avctx->extradata == 1){
2204        h->is_avc = 1;
2205        h->got_avcC = 0;
2206    } else {
2207        h->is_avc = 0;
2208    }
2209
2210    h->thread_context[0] = h;
2211    h->outputed_poc = INT_MIN;
2212    h->prev_poc_msb= 1<<16;
2213    reset_sei(h);
2214    if(avctx->codec_id == CODEC_ID_H264){
2215        if(avctx->ticks_per_frame == 1){
2216            s->avctx->time_base.den *=2;
2217        }
2218        avctx->ticks_per_frame = 2;
2219    }
2220    return 0;
2221}
2222
2223static int frame_start(H264Context *h){
2224    MpegEncContext * const s = &h->s;
2225    int i;
2226
2227    if(MPV_frame_start(s, s->avctx) < 0)
2228        return -1;
2229    ff_er_frame_start(s);
2230    /*
2231     * MPV_frame_start uses pict_type to derive key_frame.
2232     * This is incorrect for H.264; IDR markings must be used.
2233     * Zero here; IDR markings per slice in frame or fields are ORed in later.
2234     * See decode_nal_units().
2235     */
2236    s->current_picture_ptr->key_frame= 0;
2237
2238    assert(s->linesize && s->uvlinesize);
2239
2240    for(i=0; i<16; i++){
2241        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2242        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2243    }
2244    for(i=0; i<4; i++){
2245        h->block_offset[16+i]=
2246        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247        h->block_offset[24+16+i]=
2248        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249    }
2250
2251    /* can't be in alloc_tables because linesize isn't known there.
2252     * FIXME: redo bipred weight to not require extra buffer? */
2253    for(i = 0; i < s->avctx->thread_count; i++)
2254        if(!h->thread_context[i]->s.obmc_scratchpad)
2255            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2256
2257    /* some macroblocks will be accessed before they're available */
2258    if(FRAME_MBAFF || s->avctx->thread_count > 1)
2259        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2260
2261//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2262
2263    // We mark the current picture as non-reference after allocating it, so
2264    // that if we break out due to an error it can be released automatically
2265    // in the next MPV_frame_start().
2266    // SVQ3 as well as most other codecs have only last/next/current and thus
2267    // get released even with set reference, besides SVQ3 and others do not
2268    // mark frames as reference later "naturally".
2269    if(s->codec_id != CODEC_ID_SVQ3)
2270        s->current_picture_ptr->reference= 0;
2271
2272    s->current_picture_ptr->field_poc[0]=
2273    s->current_picture_ptr->field_poc[1]= INT_MAX;
2274    assert(s->current_picture_ptr->long_ref==0);
2275
2276    return 0;
2277}
2278
2279static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2280    MpegEncContext * const s = &h->s;
2281    int i;
2282    int step    = 1;
2283    int offset  = 1;
2284    int uvoffset= 1;
2285    int top_idx = 1;
2286    int skiplast= 0;
2287
2288    src_y  -=   linesize;
2289    src_cb -= uvlinesize;
2290    src_cr -= uvlinesize;
2291
2292    if(!simple && FRAME_MBAFF){
2293        if(s->mb_y&1){
2294            offset  = MB_MBAFF ? 1 : 17;
2295            uvoffset= MB_MBAFF ? 1 : 9;
2296            if(!MB_MBAFF){
2297                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
2298                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2299                if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2300                    *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2301                    *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2302                }
2303            }
2304        }else{
2305            if(!MB_MBAFF){
2306                h->left_border[0]= h->top_borders[0][s->mb_x][15];
2307                if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2308                    h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
2309                    h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2310                }
2311                skiplast= 1;
2312            }
2313            offset  =
2314            uvoffset=
2315            top_idx = MB_MBAFF ? 0 : 1;
2316        }
2317        step= MB_MBAFF ? 2 : 1;
2318    }
2319
2320    // There are two lines saved, the line above the the top macroblock of a pair,
2321    // and the line above the bottom macroblock
2322    h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2323    for(i=1; i<17 - skiplast; i++){
2324        h->left_border[offset+i*step]= src_y[15+i*  linesize];
2325    }
2326
2327    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2328    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2329
2330    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2331        h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
2332        h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2333        for(i=1; i<9 - skiplast; i++){
2334            h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
2335            h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2336        }
2337        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2338        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2339    }
2340}
2341
2342static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2343    MpegEncContext * const s = &h->s;
2344    int temp8, i;
2345    uint64_t temp64;
2346    int deblock_left;
2347    int deblock_top;
2348    int mb_xy;
2349    int step    = 1;
2350    int offset  = 1;
2351    int uvoffset= 1;
2352    int top_idx = 1;
2353
2354    if(!simple && FRAME_MBAFF){
2355        if(s->mb_y&1){
2356            offset  = MB_MBAFF ? 1 : 17;
2357            uvoffset= MB_MBAFF ? 1 : 9;
2358        }else{
2359            offset  =
2360            uvoffset=
2361            top_idx = MB_MBAFF ? 0 : 1;
2362        }
2363        step= MB_MBAFF ? 2 : 1;
2364    }
2365
2366    if(h->deblocking_filter == 2) {
2367        mb_xy = h->mb_xy;
2368        deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2369        deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2370    } else {
2371        deblock_left = (s->mb_x > 0);
2372        deblock_top =  (s->mb_y > !!MB_FIELD);
2373    }
2374
2375    src_y  -=   linesize + 1;
2376    src_cb -= uvlinesize + 1;
2377    src_cr -= uvlinesize + 1;
2378
2379#define XCHG(a,b,t,xchg)\
2380t= a;\
2381if(xchg)\
2382    a= b;\
2383b= t;
2384
2385    if(deblock_left){
2386        for(i = !deblock_top; i<16; i++){
2387            XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
2388        }
2389        XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
2390    }
2391
2392    if(deblock_top){
2393        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2394        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2395        if(s->mb_x+1 < s->mb_width){
2396            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2397        }
2398    }
2399
2400    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2401        if(deblock_left){
2402            for(i = !deblock_top; i<8; i++){
2403                XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
2404                XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2405            }
2406            XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
2407            XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2408        }
2409        if(deblock_top){
2410            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2411            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2412        }
2413    }
2414}
2415
2416static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2417    MpegEncContext * const s = &h->s;
2418    const int mb_x= s->mb_x;
2419    const int mb_y= s->mb_y;
2420    const int mb_xy= h->mb_xy;
2421    const int mb_type= s->current_picture.mb_type[mb_xy];
2422    uint8_t  *dest_y, *dest_cb, *dest_cr;
2423    int linesize, uvlinesize /*dct_offset*/;
2424    int i;
2425    int *block_offset = &h->block_offset[0];
2426    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2427    /* is_h264 should always be true if SVQ3 is disabled. */
2428    const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2429    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2430    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2431
2432    dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2433    dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2434    dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2435
2436    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2437    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2438
2439    if (!simple && MB_FIELD) {
2440        linesize   = h->mb_linesize   = s->linesize * 2;
2441        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2442        block_offset = &h->block_offset[24];
2443        if(mb_y&1){ //FIXME move out of this function?
2444            dest_y -= s->linesize*15;
2445            dest_cb-= s->uvlinesize*7;
2446            dest_cr-= s->uvlinesize*7;
2447        }
2448        if(FRAME_MBAFF) {
2449            int list;
2450            for(list=0; list<h->list_count; list++){
2451                if(!USES_LIST(mb_type, list))
2452                    continue;
2453                if(IS_16X16(mb_type)){
2454                    int8_t *ref = &h->ref_cache[list][scan8[0]];
2455                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2456                }else{
2457                    for(i=0; i<16; i+=4){
2458                        int ref = h->ref_cache[list][scan8[i]];
2459                        if(ref >= 0)
2460                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2461                    }
2462                }
2463            }
2464        }
2465    } else {
2466        linesize   = h->mb_linesize   = s->linesize;
2467        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2468//        dct_offset = s->linesize * 16;
2469    }
2470
2471    if (!simple && IS_INTRA_PCM(mb_type)) {
2472        for (i=0; i<16; i++) {
2473            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
2474        }
2475        for (i=0; i<8; i++) {
2476            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
2477            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
2478        }
2479    } else {
2480        if(IS_INTRA(mb_type)){
2481            if(h->deblocking_filter)
2482                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2483
2484            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2485                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2486                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2487            }
2488
2489            if(IS_INTRA4x4(mb_type)){
2490                if(simple || !s->encoding){
2491                    if(IS_8x8DCT(mb_type)){
2492                        if(transform_bypass){
2493                            idct_dc_add =
2494                            idct_add    = s->dsp.add_pixels8;
2495                        }else{
2496                            idct_dc_add = s->dsp.h264_idct8_dc_add;
2497                            idct_add    = s->dsp.h264_idct8_add;
2498                        }
2499                        for(i=0; i<16; i+=4){
2500                            uint8_t * const ptr= dest_y + block_offset[i];
2501                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2502                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2503                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2504                            }else{
2505                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
2506                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2507                                                            (h->topright_samples_available<<i)&0x4000, linesize);
2508                                if(nnz){
2509                                    if(nnz == 1 && h->mb[i*16])
2510                                        idct_dc_add(ptr, h->mb + i*16, linesize);
2511                                    else
2512                                        idct_add   (ptr, h->mb + i*16, linesize);
2513                                }
2514                            }
2515                        }
2516                    }else{
2517                        if(transform_bypass){
2518                            idct_dc_add =
2519                            idct_add    = s->dsp.add_pixels4;
2520                        }else{
2521                            idct_dc_add = s->dsp.h264_idct_dc_add;
2522                            idct_add    = s->dsp.h264_idct_add;
2523                        }
2524                        for(i=0; i<16; i++){
2525                            uint8_t * const ptr= dest_y + block_offset[i];
2526                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2527
2528                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2529                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2530                            }else{
2531                                uint8_t *topright;
2532                                int nnz, tr;
2533                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2534                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2535                                    assert(mb_y || linesize <= block_offset[i]);
2536                                    if(!topright_avail){
2537                                        tr= ptr[3 - linesize]*0x01010101;
2538                                        topright= (uint8_t*) &tr;
2539                                    }else
2540                                        topright= ptr + 4 - linesize;
2541                                }else
2542                                    topright= NULL;
2543
2544                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2545                                nnz = h->non_zero_count_cache[ scan8[i] ];
2546                                if(nnz){
2547                                    if(is_h264){
2548                                        if(nnz == 1 && h->mb[i*16])
2549                                            idct_dc_add(ptr, h->mb + i*16, linesize);
2550                                        else
2551                                            idct_add   (ptr, h->mb + i*16, linesize);
2552                                    }else
2553                                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2554                                }
2555                            }
2556                        }
2557                    }
2558                }
2559            }else{
2560                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2561                if(is_h264){
2562                    if(!transform_bypass)
2563                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2564                }else
2565                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2566            }
2567            if(h->deblocking_filter)
2568                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2569        }else if(is_h264){
2570            hl_motion(h, dest_y, dest_cb, dest_cr,
2571                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2572                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2573                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2574        }
2575
2576
2577        if(!IS_INTRA4x4(mb_type)){
2578            if(is_h264){
2579                if(IS_INTRA16x16(mb_type)){
2580                    if(transform_bypass){
2581                        if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2582                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2583                        }else{
2584                            for(i=0; i<16; i++){
2585                                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2586                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2587                            }
2588                        }
2589                    }else{
2590                         s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2591                    }
2592                }else if(h->cbp&15){
2593                    if(transform_bypass){
2594                        const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2595                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2596                        for(i=0; i<16; i+=di){
2597                            if(h->non_zero_count_cache[ scan8[i] ]){
2598                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2599                            }
2600                        }
2601                    }else{
2602                        if(IS_8x8DCT(mb_type)){
2603                            s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2604                        }else{
2605                            s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2606                        }
2607                    }
2608                }
2609            }else{
2610                for(i=0; i<16; i++){
2611                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2612                        uint8_t * const ptr= dest_y + block_offset[i];
2613                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2614                    }
2615                }
2616            }
2617        }
2618
2619        if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2620            uint8_t *dest[2] = {dest_cb, dest_cr};
2621            if(transform_bypass){
2622                if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2623                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2624                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2625                }else{
2626                    idct_add = s->dsp.add_pixels4;
2627                    for(i=16; i<16+8; i++){
2628                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2629                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630                    }
2631                }
2632            }else{
2633                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2634                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2635                if(is_h264){
2636                    idct_add = s->dsp.h264_idct_add;
2637                    idct_dc_add = s->dsp.h264_idct_dc_add;
2638                    for(i=16; i<16+8; i++){
2639                        if(h->non_zero_count_cache[ scan8[i] ])
2640                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2641                        else if(h->mb[i*16])
2642                            idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2643                    }
2644                }else{
2645                    for(i=16; i<16+8; i++){
2646                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2647                            uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2648                            svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2649                        }
2650                    }
2651                }
2652            }
2653        }
2654    }
2655    if(h->cbp || IS_INTRA(mb_type))
2656        s->dsp.clear_blocks(h->mb);
2657
2658    if(h->deblocking_filter) {
2659        backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2660        fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2661        h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2662        h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2663        if (!simple && FRAME_MBAFF) {
2664            filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2665        } else {
2666            filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2667        }
2668    }
2669}
2670
2671/**
2672 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2673 */
2674static void hl_decode_mb_simple(H264Context *h){
2675    hl_decode_mb_internal(h, 1);
2676}
2677
2678/**
2679 * Process a macroblock; this handles edge cases, such as interlacing.
2680 */
2681static void av_noinline hl_decode_mb_complex(H264Context *h){
2682    hl_decode_mb_internal(h, 0);
2683}
2684
2685static void hl_decode_mb(H264Context *h){
2686    MpegEncContext * const s = &h->s;
2687    const int mb_xy= h->mb_xy;
2688    const int mb_type= s->current_picture.mb_type[mb_xy];
2689    int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2690
2691    if (is_complex)
2692        hl_decode_mb_complex(h);
2693    else hl_decode_mb_simple(h);
2694}
2695
2696static void pic_as_field(Picture *pic, const int parity){
2697    int i;
2698    for (i = 0; i < 4; ++i) {
2699        if (parity == PICT_BOTTOM_FIELD)
2700            pic->data[i] += pic->linesize[i];
2701        pic->reference = parity;
2702        pic->linesize[i] *= 2;
2703    }
2704    pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2705}
2706
2707static int split_field_copy(Picture *dest, Picture *src,
2708                            int parity, int id_add){
2709    int match = !!(src->reference & parity);
2710
2711    if (match) {
2712        *dest = *src;
2713        if(parity != PICT_FRAME){
2714            pic_as_field(dest, parity);
2715            dest->pic_id *= 2;
2716            dest->pic_id += id_add;
2717        }
2718    }
2719
2720    return match;
2721}
2722
2723static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2724    int i[2]={0};
2725    int index=0;
2726
2727    while(i[0]<len || i[1]<len){
2728        while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2729            i[0]++;
2730        while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2731            i[1]++;
2732        if(i[0] < len){
2733            in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2734            split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
2735        }
2736        if(i[1] < len){
2737            in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2738            split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2739        }
2740    }
2741
2742    return index;
2743}
2744
2745static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2746    int i, best_poc;
2747    int out_i= 0;
2748
2749    for(;;){
2750        best_poc= dir ? INT_MIN : INT_MAX;
2751
2752        for(i=0; i<len; i++){
2753            const int poc= src[i]->poc;
2754            if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2755                best_poc= poc;
2756                sorted[out_i]= src[i];
2757            }
2758        }
2759        if(best_poc == (dir ? INT_MIN : INT_MAX))
2760            break;
2761        limit= sorted[out_i++]->poc - dir;
2762    }
2763    return out_i;
2764}
2765
2766/**
2767 * fills the default_ref_list.
2768 */
2769static int fill_default_ref_list(H264Context *h){
2770    MpegEncContext * const s = &h->s;
2771    int i, len;
2772
2773    if(h->slice_type_nos==FF_B_TYPE){
2774        Picture *sorted[32];
2775        int cur_poc, list;
2776        int lens[2];
2777
2778        if(FIELD_PICTURE)
2779            cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2780        else
2781            cur_poc= s->current_picture_ptr->poc;
2782
2783        for(list= 0; list<2; list++){
2784            len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2785            len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2786            assert(len<=32);
2787            len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
2788            len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2789            assert(len<=32);
2790
2791            if(len < h->ref_count[list])
2792                memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2793            lens[list]= len;
2794        }
2795
2796        if(lens[0] == lens[1] && lens[1] > 1){
2797            for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2798            if(i == lens[0])
2799                FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2800        }
2801    }else{
2802        len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2803        len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
2804        assert(len <= 32);
2805        if(len < h->ref_count[0])
2806            memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2807    }
2808#ifdef TRACE
2809    for (i=0; i<h->ref_count[0]; i++) {
2810        tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2811    }
2812    if(h->slice_type_nos==FF_B_TYPE){
2813        for (i=0; i<h->ref_count[1]; i++) {
2814            tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2815        }
2816    }
2817#endif
2818    return 0;
2819}
2820
2821static void print_short_term(H264Context *h);
2822static void print_long_term(H264Context *h);
2823
2824/**
2825 * Extract structure information about the picture described by pic_num in
2826 * the current decoding context (frame or field). Note that pic_num is
2827 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2828 * @param pic_num picture number for which to extract structure information
2829 * @param structure one of PICT_XXX describing structure of picture
2830 *                      with pic_num
2831 * @return frame number (short term) or long term index of picture
2832 *         described by pic_num
2833 */
2834static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2835    MpegEncContext * const s = &h->s;
2836
2837    *structure = s->picture_structure;
2838    if(FIELD_PICTURE){
2839        if (!(pic_num & 1))
2840            /* opposite field */
2841            *structure ^= PICT_FRAME;
2842        pic_num >>= 1;
2843    }
2844
2845    return pic_num;
2846}
2847
2848static int decode_ref_pic_list_reordering(H264Context *h){
2849    MpegEncContext * const s = &h->s;
2850    int list, index, pic_structure;
2851
2852    print_short_term(h);
2853    print_long_term(h);
2854
2855    for(list=0; list<h->list_count; list++){
2856        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2857
2858        if(get_bits1(&s->gb)){
2859            int pred= h->curr_pic_num;
2860
2861            for(index=0; ; index++){
2862                unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2863                unsigned int pic_id;
2864                int i;
2865                Picture *ref = NULL;
2866
2867                if(reordering_of_pic_nums_idc==3)
2868                    break;
2869
2870                if(index >= h->ref_count[list]){
2871                    av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2872                    return -1;
2873                }
2874
2875                if(reordering_of_pic_nums_idc<3){
2876                    if(reordering_of_pic_nums_idc<2){
2877                        const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2878                        int frame_num;
2879
2880                        if(abs_diff_pic_num > h->max_pic_num){
2881                            av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2882                            return -1;
2883                        }
2884
2885                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2886                        else                                pred+= abs_diff_pic_num;
2887                        pred &= h->max_pic_num - 1;
2888
2889                        frame_num = pic_num_extract(h, pred, &pic_structure);
2890
2891                        for(i= h->short_ref_count-1; i>=0; i--){
2892                            ref = h->short_ref[i];
2893                            assert(ref->reference);
2894                            assert(!ref->long_ref);
2895                            if(
2896                                   ref->frame_num == frame_num &&
2897                                   (ref->reference & pic_structure)
2898                              )
2899                                break;
2900                        }
2901                        if(i>=0)
2902                            ref->pic_id= pred;
2903                    }else{
2904                        int long_idx;
2905                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2906
2907                        long_idx= pic_num_extract(h, pic_id, &pic_structure);
2908
2909                        if(long_idx>31){
2910                            av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2911                            return -1;
2912                        }
2913                        ref = h->long_ref[long_idx];
2914                        assert(!(ref && !ref->reference));
2915                        if(ref && (ref->reference & pic_structure)){
2916                            ref->pic_id= pic_id;
2917                            assert(ref->long_ref);
2918                            i=0;
2919                        }else{
2920                            i=-1;
2921                        }
2922                    }
2923
2924                    if (i < 0) {
2925                        av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2926                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2927                    } else {
2928                        for(i=index; i+1<h->ref_count[list]; i++){
2929                            if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2930                                break;
2931                        }
2932                        for(; i > index; i--){
2933                            h->ref_list[list][i]= h->ref_list[list][i-1];
2934                        }
2935                        h->ref_list[list][index]= *ref;
2936                        if (FIELD_PICTURE){
2937                            pic_as_field(&h->ref_list[list][index], pic_structure);
2938                        }
2939                    }
2940                }else{
2941                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2942                    return -1;
2943                }
2944            }
2945        }
2946    }
2947    for(list=0; list<h->list_count; list++){
2948        for(index= 0; index < h->ref_count[list]; index++){
2949            if(!h->ref_list[list][index].data[0]){
2950                av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2951                h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2952            }
2953        }
2954    }
2955
2956    return 0;
2957}
2958
2959static void fill_mbaff_ref_list(H264Context *h){
2960    int list, i, j;
2961    for(list=0; list<2; list++){ //FIXME try list_count
2962        for(i=0; i<h->ref_count[list]; i++){
2963            Picture *frame = &h->ref_list[list][i];
2964            Picture *field = &h->ref_list[list][16+2*i];
2965            field[0] = *frame;
2966            for(j=0; j<3; j++)
2967                field[0].linesize[j] <<= 1;
2968            field[0].reference = PICT_TOP_FIELD;
2969            field[0].poc= field[0].field_poc[0];
2970            field[1] = field[0];
2971            for(j=0; j<3; j++)
2972                field[1].data[j] += frame->linesize[j];
2973            field[1].reference = PICT_BOTTOM_FIELD;
2974            field[1].poc= field[1].field_poc[1];
2975
2976            h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2977            h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2978            for(j=0; j<2; j++){
2979                h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2980                h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2981            }
2982        }
2983    }
2984    for(j=0; j<h->ref_count[1]; j++){
2985        for(i=0; i<h->ref_count[0]; i++)
2986            h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2987        memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
2988        memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2989    }
2990}
2991
2992static int pred_weight_table(H264Context *h){
2993    MpegEncContext * const s = &h->s;
2994    int list, i;
2995    int luma_def, chroma_def;
2996
2997    h->use_weight= 0;
2998    h->use_weight_chroma= 0;
2999    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3000    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3001    luma_def = 1<<h->luma_log2_weight_denom;
3002    chroma_def = 1<<h->chroma_log2_weight_denom;
3003
3004    for(list=0; list<2; list++){
3005        h->luma_weight_flag[list]   = 0;
3006        h->chroma_weight_flag[list] = 0;
3007        for(i=0; i<h->ref_count[list]; i++){
3008            int luma_weight_flag, chroma_weight_flag;
3009
3010            luma_weight_flag= get_bits1(&s->gb);
3011            if(luma_weight_flag){
3012                h->luma_weight[list][i]= get_se_golomb(&s->gb);
3013                h->luma_offset[list][i]= get_se_golomb(&s->gb);
3014                if(   h->luma_weight[list][i] != luma_def
3015                   || h->luma_offset[list][i] != 0) {
3016                    h->use_weight= 1;
3017                    h->luma_weight_flag[list]= 1;
3018                }
3019            }else{
3020                h->luma_weight[list][i]= luma_def;
3021                h->luma_offset[list][i]= 0;
3022            }
3023
3024            if(CHROMA){
3025                chroma_weight_flag= get_bits1(&s->gb);
3026                if(chroma_weight_flag){
3027                    int j;
3028                    for(j=0; j<2; j++){
3029                        h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3030                        h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3031                        if(   h->chroma_weight[list][i][j] != chroma_def
3032                           || h->chroma_offset[list][i][j] != 0) {
3033                            h->use_weight_chroma= 1;
3034                            h->chroma_weight_flag[list]= 1;
3035                        }
3036                    }
3037                }else{
3038                    int j;
3039                    for(j=0; j<2; j++){
3040                        h->chroma_weight[list][i][j]= chroma_def;
3041                        h->chroma_offset[list][i][j]= 0;
3042                    }
3043                }
3044            }
3045        }
3046        if(h->slice_type_nos != FF_B_TYPE) break;
3047    }
3048    h->use_weight= h->use_weight || h->use_weight_chroma;
3049    return 0;
3050}
3051
3052static void implicit_weight_table(H264Context *h){
3053    MpegEncContext * const s = &h->s;
3054    int ref0, ref1, i;
3055    int cur_poc = s->current_picture_ptr->poc;
3056
3057    for (i = 0; i < 2; i++) {
3058        h->luma_weight_flag[i]   = 0;
3059        h->chroma_weight_flag[i] = 0;
3060    }
3061
3062    if(   h->ref_count[0] == 1 && h->ref_count[1] == 1
3063       && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3064        h->use_weight= 0;
3065        h->use_weight_chroma= 0;
3066        return;
3067    }
3068
3069    h->use_weight= 2;
3070    h->use_weight_chroma= 2;
3071    h->luma_log2_weight_denom= 5;
3072    h->chroma_log2_weight_denom= 5;
3073
3074    for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3075        int poc0 = h->ref_list[0][ref0].poc;
3076        for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3077            int poc1 = h->ref_list[1][ref1].poc;
3078            int td = av_clip(poc1 - poc0, -128, 127);
3079            if(td){
3080                int tb = av_clip(cur_poc - poc0, -128, 127);
3081                int tx = (16384 + (FFABS(td) >> 1)) / td;
3082                int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3083                if(dist_scale_factor < -64 || dist_scale_factor > 128)
3084                    h->implicit_weight[ref0][ref1] = 32;
3085                else
3086                    h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3087            }else
3088                h->implicit_weight[ref0][ref1] = 32;
3089        }
3090    }
3091}
3092
3093/**
3094 * Mark a picture as no longer needed for reference. The refmask
3095 * argument allows unreferencing of individual fields or the whole frame.
3096 * If the picture becomes entirely unreferenced, but is being held for
3097 * display purposes, it is marked as such.
3098 * @param refmask mask of fields to unreference; the mask is bitwise
3099 *                anded with the reference marking of pic
3100 * @return non-zero if pic becomes entirely unreferenced (except possibly
3101 *         for display purposes) zero if one of the fields remains in
3102 *         reference
3103 */
3104static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3105    int i;
3106    if (pic->reference &= refmask) {
3107        return 0;
3108    } else {
3109        for(i = 0; h->delayed_pic[i]; i++)
3110            if(pic == h->delayed_pic[i]){
3111                pic->reference=DELAYED_PIC_REF;
3112                break;
3113            }
3114        return 1;
3115    }
3116}
3117
3118/**
3119 * instantaneous decoder refresh.
3120 */
3121static void idr(H264Context *h){
3122    int i;
3123
3124    for(i=0; i<16; i++){
3125        remove_long(h, i, 0);
3126    }
3127    assert(h->long_ref_count==0);
3128
3129    for(i=0; i<h->short_ref_count; i++){
3130        unreference_pic(h, h->short_ref[i], 0);
3131        h->short_ref[i]= NULL;
3132    }
3133    h->short_ref_count=0;
3134    h->prev_frame_num= 0;
3135    h->prev_frame_num_offset= 0;
3136    h->prev_poc_msb=
3137    h->prev_poc_lsb= 0;
3138}
3139
3140/* forget old pics after a seek */
3141static void flush_dpb(AVCodecContext *avctx){
3142    H264Context *h= avctx->priv_data;
3143    int i;
3144    for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3145        if(h->delayed_pic[i])
3146            h->delayed_pic[i]->reference= 0;
3147        h->delayed_pic[i]= NULL;
3148    }
3149    h->outputed_poc= INT_MIN;
3150    idr(h);
3151    if(h->s.current_picture_ptr)
3152        h->s.current_picture_ptr->reference= 0;
3153    h->s.first_field= 0;
3154    reset_sei(h);
3155    ff_mpeg_flush(avctx);
3156}
3157
3158/**
3159 * Find a Picture in the short term reference list by frame number.
3160 * @param frame_num frame number to search for
3161 * @param idx the index into h->short_ref where returned picture is found
3162 *            undefined if no picture found.
3163 * @return pointer to the found picture, or NULL if no pic with the provided
3164 *                 frame number is found
3165 */
3166static Picture * find_short(H264Context *h, int frame_num, int *idx){
3167    MpegEncContext * const s = &h->s;
3168    int i;
3169
3170    for(i=0; i<h->short_ref_count; i++){
3171        Picture *pic= h->short_ref[i];
3172        if(s->avctx->debug&FF_DEBUG_MMCO)
3173            av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3174        if(pic->frame_num == frame_num) {
3175            *idx = i;
3176            return pic;
3177        }
3178    }
3179    return NULL;
3180}
3181
3182/**
3183 * Remove a picture from the short term reference list by its index in
3184 * that list.  This does no checking on the provided index; it is assumed
3185 * to be valid. Other list entries are shifted down.
3186 * @param i index into h->short_ref of picture to remove.
3187 */
3188static void remove_short_at_index(H264Context *h, int i){
3189    assert(i >= 0 && i < h->short_ref_count);
3190    h->short_ref[i]= NULL;
3191    if (--h->short_ref_count)
3192        memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3193}
3194
3195/**
3196 *
3197 * @return the removed picture or NULL if an error occurs
3198 */
3199static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3200    MpegEncContext * const s = &h->s;
3201    Picture *pic;
3202    int i;
3203
3204    if(s->avctx->debug&FF_DEBUG_MMCO)
3205        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3206
3207    pic = find_short(h, frame_num, &i);
3208    if (pic){
3209        if(unreference_pic(h, pic, ref_mask))
3210        remove_short_at_index(h, i);
3211    }
3212
3213    return pic;
3214}
3215
3216/**
3217 * Remove a picture from the long term reference list by its index in
3218 * that list.
3219 * @return the removed picture or NULL if an error occurs
3220 */
3221static Picture * remove_long(H264Context *h, int i, int ref_mask){
3222    Picture *pic;
3223
3224    pic= h->long_ref[i];
3225    if (pic){
3226        if(unreference_pic(h, pic, ref_mask)){
3227            assert(h->long_ref[i]->long_ref == 1);
3228            h->long_ref[i]->long_ref= 0;
3229            h->long_ref[i]= NULL;
3230            h->long_ref_count--;
3231        }
3232    }
3233
3234    return pic;
3235}
3236
3237/**
3238 * print short term list
3239 */
3240static void print_short_term(H264Context *h) {
3241    uint32_t i;
3242    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3243        av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3244        for(i=0; i<h->short_ref_count; i++){
3245            Picture *pic= h->short_ref[i];
3246            av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3247        }
3248    }
3249}
3250
3251/**
3252 * print long term list
3253 */
3254static void print_long_term(H264Context *h) {
3255    uint32_t i;
3256    if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3257        av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3258        for(i = 0; i < 16; i++){
3259            Picture *pic= h->long_ref[i];
3260            if (pic) {
3261                av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3262            }
3263        }
3264    }
3265}
3266
3267/**
3268 * Executes the reference picture marking (memory management control operations).
3269 */
3270static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3271    MpegEncContext * const s = &h->s;
3272    int i, j;
3273    int current_ref_assigned=0;
3274    Picture *av_uninit(pic);
3275
3276    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3277        av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3278
3279    for(i=0; i<mmco_count; i++){
3280        int structure, av_uninit(frame_num);
3281        if(s->avctx->debug&FF_DEBUG_MMCO)
3282            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3283
3284        if(   mmco[i].opcode == MMCO_SHORT2UNUSED
3285           || mmco[i].opcode == MMCO_SHORT2LONG){
3286            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3287            pic = find_short(h, frame_num, &j);
3288            if(!pic){
3289                if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3290                   || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3291                av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3292                continue;
3293            }
3294        }
3295
3296        switch(mmco[i].opcode){
3297        case MMCO_SHORT2UNUSED:
3298            if(s->avctx->debug&FF_DEBUG_MMCO)
3299                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3300            remove_short(h, frame_num, structure ^ PICT_FRAME);
3301            break;
3302        case MMCO_SHORT2LONG:
3303                if (h->long_ref[mmco[i].long_arg] != pic)
3304                    remove_long(h, mmco[i].long_arg, 0);
3305
3306                remove_short_at_index(h, j);
3307                h->long_ref[ mmco[i].long_arg ]= pic;
3308                if (h->long_ref[ mmco[i].long_arg ]){
3309                    h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3310                    h->long_ref_count++;
3311                }
3312            break;
3313        case MMCO_LONG2UNUSED:
3314            j = pic_num_extract(h, mmco[i].long_arg, &structure);
3315            pic = h->long_ref[j];
3316            if (pic) {
3317                remove_long(h, j, structure ^ PICT_FRAME);
3318            } else if(s->avctx->debug&FF_DEBUG_MMCO)
3319                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3320            break;
3321        case MMCO_LONG:
3322                    // Comment below left from previous code as it is an interresting note.
3323                    /* First field in pair is in short term list or
3324                     * at a different long term index.
3325                     * This is not allowed; see 7.4.3.3, notes 2 and 3.
3326                     * Report the problem and keep the pair where it is,
3327                     * and mark this field valid.
3328                     */
3329
3330            if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3331                remove_long(h, mmco[i].long_arg, 0);
3332
3333                h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3334                h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3335                h->long_ref_count++;
3336            }
3337
3338            s->current_picture_ptr->reference |= s->picture_structure;
3339            current_ref_assigned=1;
3340            break;
3341        case MMCO_SET_MAX_LONG:
3342            assert(mmco[i].long_arg <= 16);
3343            // just remove the long term which index is greater than new max
3344            for(j = mmco[i].long_arg; j<16; j++){
3345                remove_long(h, j, 0);
3346            }
3347            break;
3348        case MMCO_RESET:
3349            while(h->short_ref_count){
3350                remove_short(h, h->short_ref[0]->frame_num, 0);
3351            }
3352            for(j = 0; j < 16; j++) {
3353                remove_long(h, j, 0);
3354            }
3355            s->current_picture_ptr->poc=
3356            s->current_picture_ptr->field_poc[0]=
3357            s->current_picture_ptr->field_poc[1]=
3358            h->poc_lsb=
3359            h->poc_msb=
3360            h->frame_num=
3361            s->current_picture_ptr->frame_num= 0;
3362            break;
3363        default: assert(0);
3364        }
3365    }
3366
3367    if (!current_ref_assigned) {
3368        /* Second field of complementary field pair; the first field of
3369         * which is already referenced. If short referenced, it
3370         * should be first entry in short_ref. If not, it must exist
3371         * in long_ref; trying to put it on the short list here is an
3372         * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3373         */
3374        if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3375            /* Just mark the second field valid */
3376            s->current_picture_ptr->reference = PICT_FRAME;
3377        } else if (s->current_picture_ptr->long_ref) {
3378            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3379                                             "assignment for second field "
3380                                             "in complementary field pair "
3381                                             "(first field is long term)\n");
3382        } else {
3383            pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3384            if(pic){
3385                av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3386            }
3387
3388            if(h->short_ref_count)
3389                memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3390
3391            h->short_ref[0]= s->current_picture_ptr;
3392            h->short_ref_count++;
3393            s->current_picture_ptr->reference |= s->picture_structure;
3394        }
3395    }
3396
3397    if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3398
3399        /* We have too many reference frames, probably due to corrupted
3400         * stream. Need to discard one frame. Prevents overrun of the
3401         * short_ref and long_ref buffers.
3402         */
3403        av_log(h->s.avctx, AV_LOG_ERROR,
3404               "number of reference frames exceeds max (probably "
3405               "corrupt input), discarding one\n");
3406
3407        if (h->long_ref_count && !h->short_ref_count) {
3408            for (i = 0; i < 16; ++i)
3409                if (h->long_ref[i])
3410                    break;
3411
3412            assert(i < 16);
3413            remove_long(h, i, 0);
3414        } else {
3415            pic = h->short_ref[h->short_ref_count - 1];
3416            remove_short(h, pic->frame_num, 0);
3417        }
3418    }
3419
3420    print_short_term(h);
3421    print_long_term(h);
3422    return 0;
3423}
3424
3425static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3426    MpegEncContext * const s = &h->s;
3427    int i;
3428
3429    h->mmco_index= 0;
3430    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3431        s->broken_link= get_bits1(gb) -1;
3432        if(get_bits1(gb)){
3433            h->mmco[0].opcode= MMCO_LONG;
3434            h->mmco[0].long_arg= 0;
3435            h->mmco_index= 1;
3436        }
3437    }else{
3438        if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3439            for(i= 0; i<MAX_MMCO_COUNT; i++) {
3440                MMCOOpcode opcode= get_ue_golomb_31(gb);
3441
3442                h->mmco[i].opcode= opcode;
3443                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3444                    h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3445/*                    if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3446                        av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3447                        return -1;
3448                    }*/
3449                }
3450                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3451                    unsigned int long_arg= get_ue_golomb_31(gb);
3452                    if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3453                        av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3454                        return -1;
3455                    }
3456                    h->mmco[i].long_arg= long_arg;
3457                }
3458
3459                if(opcode > (unsigned)MMCO_LONG){
3460                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3461                    return -1;
3462                }
3463                if(opcode == MMCO_END)
3464                    break;
3465            }
3466            h->mmco_index= i;
3467        }else{
3468            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3469
3470            if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3471                    !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3472                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3473                h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3474                h->mmco_index= 1;
3475                if (FIELD_PICTURE) {
3476                    h->mmco[0].short_pic_num *= 2;
3477                    h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3478                    h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3479                    h->mmco_index= 2;
3480                }
3481            }
3482        }
3483    }
3484
3485    return 0;
3486}
3487
3488static int init_poc(H264Context *h){
3489    MpegEncContext * const s = &h->s;
3490    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3491    int field_poc[2];
3492    Picture *cur = s->current_picture_ptr;
3493
3494    h->frame_num_offset= h->prev_frame_num_offset;
3495    if(h->frame_num < h->prev_frame_num)
3496        h->frame_num_offset += max_frame_num;
3497
3498    if(h->sps.poc_type==0){
3499        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3500
3501        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3502            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3503        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3504            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3505        else
3506            h->poc_msb = h->prev_poc_msb;
3507//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3508        field_poc[0] =
3509        field_poc[1] = h->poc_msb + h->poc_lsb;
3510        if(s->picture_structure == PICT_FRAME)
3511            field_poc[1] += h->delta_poc_bottom;
3512    }else if(h->sps.poc_type==1){
3513        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3514        int i;
3515
3516        if(h->sps.poc_cycle_length != 0)
3517            abs_frame_num = h->frame_num_offset + h->frame_num;
3518        else
3519            abs_frame_num = 0;
3520
3521        if(h->nal_ref_idc==0 && abs_frame_num > 0)
3522            abs_frame_num--;
3523
3524        expected_delta_per_poc_cycle = 0;
3525        for(i=0; i < h->sps.poc_cycle_length; i++)
3526            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3527
3528        if(abs_frame_num > 0){
3529            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3530            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3531
3532            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3533            for(i = 0; i <= frame_num_in_poc_cycle; i++)
3534                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3535        } else
3536            expectedpoc = 0;
3537
3538        if(h->nal_ref_idc == 0)
3539            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3540
3541        field_poc[0] = expectedpoc + h->delta_poc[0];
3542        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3543
3544        if(s->picture_structure == PICT_FRAME)
3545            field_poc[1] += h->delta_poc[1];
3546    }else{
3547        int poc= 2*(h->frame_num_offset + h->frame_num);
3548
3549        if(!h->nal_ref_idc)
3550            poc--;
3551
3552        field_poc[0]= poc;
3553        field_poc[1]= poc;
3554    }
3555
3556    if(s->picture_structure != PICT_BOTTOM_FIELD)
3557        s->current_picture_ptr->field_poc[0]= field_poc[0];
3558    if(s->picture_structure != PICT_TOP_FIELD)
3559        s->current_picture_ptr->field_poc[1]= field_poc[1];
3560    cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3561
3562    return 0;
3563}
3564
3565
3566/**
3567 * initialize scan tables
3568 */
3569static void init_scan_tables(H264Context *h){
3570    MpegEncContext * const s = &h->s;
3571    int i;
3572    if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3573        memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3574        memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
3575    }else{
3576        for(i=0; i<16; i++){
3577#define T(x) (x>>2) | ((x<<2) & 0xF)
3578            h->zigzag_scan[i] = T(zigzag_scan[i]);
3579            h-> field_scan[i] = T( field_scan[i]);
3580#undef T
3581        }
3582    }
3583    if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3584        memcpy(h->zigzag_scan8x8,       ff_zigzag_direct,     64*sizeof(uint8_t));
3585        memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3586        memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
3587        memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
3588    }else{
3589        for(i=0; i<64; i++){
3590#define T(x) (x>>3) | ((x&7)<<3)
3591            h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
3592            h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3593            h->field_scan8x8[i]        = T(field_scan8x8[i]);
3594            h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
3595#undef T
3596        }
3597    }
3598    if(h->sps.transform_bypass){ //FIXME same ugly
3599        h->zigzag_scan_q0          = zigzag_scan;
3600        h->zigzag_scan8x8_q0       = ff_zigzag_direct;
3601        h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3602        h->field_scan_q0           = field_scan;
3603        h->field_scan8x8_q0        = field_scan8x8;
3604        h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
3605    }else{
3606        h->zigzag_scan_q0          = h->zigzag_scan;
3607        h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
3608        h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3609        h->field_scan_q0           = h->field_scan;
3610        h->field_scan8x8_q0        = h->field_scan8x8;
3611        h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
3612    }
3613}
3614
3615/**
3616 * Replicates H264 "master" context to thread contexts.
3617 */
3618static void clone_slice(H264Context *dst, H264Context *src)
3619{
3620    memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
3621    dst->s.current_picture_ptr  = src->s.current_picture_ptr;
3622    dst->s.current_picture      = src->s.current_picture;
3623    dst->s.linesize             = src->s.linesize;
3624    dst->s.uvlinesize           = src->s.uvlinesize;
3625    dst->s.first_field          = src->s.first_field;
3626
3627    dst->prev_poc_msb           = src->prev_poc_msb;
3628    dst->prev_poc_lsb           = src->prev_poc_lsb;
3629    dst->prev_frame_num_offset  = src->prev_frame_num_offset;
3630    dst->prev_frame_num         = src->prev_frame_num;
3631    dst->short_ref_count        = src->short_ref_count;
3632
3633    memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
3634    memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
3635    memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3636    memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
3637
3638    memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
3639    memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
3640}
3641
3642/**
3643 * decodes a slice header.
3644 * This will also call MPV_common_init() and frame_start() as needed.
3645 *
3646 * @param h h264context
3647 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3648 *
3649 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3650 */
3651static int decode_slice_header(H264Context *h, H264Context *h0){
3652    MpegEncContext * const s = &h->s;
3653    MpegEncContext * const s0 = &h0->s;
3654    unsigned int first_mb_in_slice;
3655    unsigned int pps_id;
3656    int num_ref_idx_active_override_flag;
3657    unsigned int slice_type, tmp, i, j;
3658    int default_ref_list_done = 0;
3659    int last_pic_structure;
3660
3661    s->dropable= h->nal_ref_idc == 0;
3662
3663    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3664        s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3665        s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3666    }else{
3667        s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3668        s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3669    }
3670
3671    first_mb_in_slice= get_ue_golomb(&s->gb);
3672
3673    if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3674        h0->current_slice = 0;
3675        if (!s0->first_field)
3676            s->current_picture_ptr= NULL;
3677    }
3678
3679    slice_type= get_ue_golomb_31(&s->gb);
3680    if(slice_type > 9){
3681        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3682        return -1;
3683    }
3684    if(slice_type > 4){
3685        slice_type -= 5;
3686        h->slice_type_fixed=1;
3687    }else
3688        h->slice_type_fixed=0;
3689
3690    slice_type= golomb_to_pict_type[ slice_type ];
3691    if (slice_type == FF_I_TYPE
3692        || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3693        default_ref_list_done = 1;
3694    }
3695    h->slice_type= slice_type;
3696    h->slice_type_nos= slice_type & 3;
3697
3698    s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3699    if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3700        av_log(h->s.avctx, AV_LOG_ERROR,
3701               "B picture before any references, skipping\n");
3702        return -1;
3703    }
3704
3705    pps_id= get_ue_golomb(&s->gb);
3706    if(pps_id>=MAX_PPS_COUNT){
3707        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3708        return -1;
3709    }
3710    if(!h0->pps_buffers[pps_id]) {
3711        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3712        return -1;
3713    }
3714    h->pps= *h0->pps_buffers[pps_id];
3715
3716    if(!h0->sps_buffers[h->pps.sps_id]) {
3717        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3718        return -1;
3719    }
3720    h->sps = *h0->sps_buffers[h->pps.sps_id];
3721
3722    if(h == h0 && h->dequant_coeff_pps != pps_id){
3723        h->dequant_coeff_pps = pps_id;
3724        init_dequant_tables(h);
3725    }
3726
3727    s->mb_width= h->sps.mb_width;
3728    s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3729
3730    h->b_stride=  s->mb_width*4;
3731    h->b8_stride= s->mb_width*2;
3732
3733    s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3734    if(h->sps.frame_mbs_only_flag)
3735        s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3736    else
3737        s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3738
3739    if (s->context_initialized
3740        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
3741        if(h != h0)
3742            return -1;   // width / height changed during parallelized decoding
3743        free_tables(h);
3744        flush_dpb(s->avctx);
3745        MPV_common_end(s);
3746    }
3747    if (!s->context_initialized) {
3748        if(h != h0)
3749            return -1;  // we cant (re-)initialize context during parallel decoding
3750        if (MPV_common_init(s) < 0)
3751            return -1;
3752        s->first_field = 0;
3753
3754        init_scan_tables(h);
3755        alloc_tables(h);
3756
3757        for(i = 1; i < s->avctx->thread_count; i++) {
3758            H264Context *c;
3759            c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3760            memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3761            memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3762            c->sps = h->sps;
3763            c->pps = h->pps;
3764            init_scan_tables(c);
3765            clone_tables(c, h);
3766        }
3767
3768        for(i = 0; i < s->avctx->thread_count; i++)
3769            if(context_init(h->thread_context[i]) < 0)
3770                return -1;
3771
3772        s->avctx->width = s->width;
3773        s->avctx->height = s->height;
3774        s->avctx->sample_aspect_ratio= h->sps.sar;
3775        if(!s->avctx->sample_aspect_ratio.den)
3776            s->avctx->sample_aspect_ratio.den = 1;
3777
3778        if(h->sps.timing_info_present_flag){
3779            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3780            if(h->x264_build > 0 && h->x264_build < 44)
3781                s->avctx->time_base.den *= 2;
3782            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3783                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3784        }
3785    }
3786
3787    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3788
3789    h->mb_mbaff = 0;
3790    h->mb_aff_frame = 0;
3791    last_pic_structure = s0->picture_structure;
3792    if(h->sps.frame_mbs_only_flag){
3793        s->picture_structure= PICT_FRAME;
3794    }else{
3795        if(get_bits1(&s->gb)) { //field_pic_flag
3796            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3797        } else {
3798            s->picture_structure= PICT_FRAME;
3799            h->mb_aff_frame = h->sps.mb_aff;
3800        }
3801    }
3802    h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3803
3804    if(h0->current_slice == 0){
3805        while(h->frame_num !=  h->prev_frame_num &&
3806              h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3807            av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3808            if (frame_start(h) < 0)
3809                return -1;
3810            h->prev_frame_num++;
3811            h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3812            s->current_picture_ptr->frame_num= h->prev_frame_num;
3813            execute_ref_pic_marking(h, NULL, 0);
3814        }
3815
3816        /* See if we have a decoded first field looking for a pair... */
3817        if (s0->first_field) {
3818            assert(s0->current_picture_ptr);
3819            assert(s0->current_picture_ptr->data[0]);
3820            assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3821
3822            /* figure out if we have a complementary field pair */
3823            if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3824                /*
3825                 * Previous field is unmatched. Don't display it, but let it
3826                 * remain for reference if marked as such.
3827                 */
3828                s0->current_picture_ptr = NULL;
3829                s0->first_field = FIELD_PICTURE;
3830
3831            } else {
3832                if (h->nal_ref_idc &&
3833                        s0->current_picture_ptr->reference &&
3834                        s0->current_picture_ptr->frame_num != h->frame_num) {
3835                    /*
3836                     * This and previous field were reference, but had
3837                     * different frame_nums. Consider this field first in
3838                     * pair. Throw away previous field except for reference
3839                     * purposes.
3840                     */
3841                    s0->first_field = 1;
3842                    s0->current_picture_ptr = NULL;
3843
3844                } else {
3845                    /* Second field in complementary pair */
3846                    s0->first_field = 0;
3847                }
3848            }
3849
3850        } else {
3851            /* Frame or first field in a potentially complementary pair */
3852            assert(!s0->current_picture_ptr);
3853            s0->first_field = FIELD_PICTURE;
3854        }
3855
3856        if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3857            s0->first_field = 0;
3858            return -1;
3859        }
3860    }
3861    if(h != h0)
3862        clone_slice(h, h0);
3863
3864    s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3865
3866    assert(s->mb_num == s->mb_width * s->mb_height);
3867    if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3868       first_mb_in_slice                    >= s->mb_num){
3869        av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3870        return -1;
3871    }
3872    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3873    s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3874    if (s->picture_structure == PICT_BOTTOM_FIELD)
3875        s->resync_mb_y = s->mb_y = s->mb_y + 1;
3876    assert(s->mb_y < s->mb_height);
3877
3878    if(s->picture_structure==PICT_FRAME){
3879        h->curr_pic_num=   h->frame_num;
3880        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3881    }else{
3882        h->curr_pic_num= 2*h->frame_num + 1;
3883        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3884    }
3885
3886    if(h->nal_unit_type == NAL_IDR_SLICE){
3887        get_ue_golomb(&s->gb); /* idr_pic_id */
3888    }
3889
3890    if(h->sps.poc_type==0){
3891        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3892
3893        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3894            h->delta_poc_bottom= get_se_golomb(&s->gb);
3895        }
3896    }
3897
3898    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3899        h->delta_poc[0]= get_se_golomb(&s->gb);
3900
3901        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3902            h->delta_poc[1]= get_se_golomb(&s->gb);
3903    }
3904
3905    init_poc(h);
3906
3907    if(h->pps.redundant_pic_cnt_present){
3908        h->redundant_pic_count= get_ue_golomb(&s->gb);
3909    }
3910
3911    //set defaults, might be overridden a few lines later
3912    h->ref_count[0]= h->pps.ref_count[0];
3913    h->ref_count[1]= h->pps.ref_count[1];
3914
3915    if(h->slice_type_nos != FF_I_TYPE){
3916        if(h->slice_type_nos == FF_B_TYPE){
3917            h->direct_spatial_mv_pred= get_bits1(&s->gb);
3918        }
3919        num_ref_idx_active_override_flag= get_bits1(&s->gb);
3920
3921        if(num_ref_idx_active_override_flag){
3922            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3923            if(h->slice_type_nos==FF_B_TYPE)
3924                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3925
3926            if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3927                av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3928                h->ref_count[0]= h->ref_count[1]= 1;
3929                return -1;
3930            }
3931        }
3932        if(h->slice_type_nos == FF_B_TYPE)
3933            h->list_count= 2;
3934        else
3935            h->list_count= 1;
3936    }else
3937        h->list_count= 0;
3938
3939    if(!default_ref_list_done){
3940        fill_default_ref_list(h);
3941    }
3942
3943    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3944        return -1;
3945
3946    if(h->slice_type_nos!=FF_I_TYPE){
3947        s->last_picture_ptr= &h->ref_list[0][0];
3948        ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3949    }
3950    if(h->slice_type_nos==FF_B_TYPE){
3951        s->next_picture_ptr= &h->ref_list[1][0];
3952        ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3953    }
3954
3955    if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
3956       ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3957        pred_weight_table(h);
3958    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3959        implicit_weight_table(h);
3960    else {
3961        h->use_weight = 0;
3962        for (i = 0; i < 2; i++) {
3963            h->luma_weight_flag[i]   = 0;
3964            h->chroma_weight_flag[i] = 0;
3965        }
3966    }
3967
3968    if(h->nal_ref_idc)
3969        decode_ref_pic_marking(h0, &s->gb);
3970
3971    if(FRAME_MBAFF)
3972        fill_mbaff_ref_list(h);
3973
3974    if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3975        direct_dist_scale_factor(h);
3976    direct_ref_list_init(h);
3977
3978    if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3979        tmp = get_ue_golomb_31(&s->gb);
3980        if(tmp > 2){
3981            av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3982            return -1;
3983        }
3984        h->cabac_init_idc= tmp;
3985    }
3986
3987    h->last_qscale_diff = 0;
3988    tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3989    if(tmp>51){
3990        av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3991        return -1;
3992    }
3993    s->qscale= tmp;
3994    h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3995    h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3996    //FIXME qscale / qp ... stuff
3997    if(h->slice_type == FF_SP_TYPE){
3998        get_bits1(&s->gb); /* sp_for_switch_flag */
3999    }
4000    if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4001        get_se_golomb(&s->gb); /* slice_qs_delta */
4002    }
4003
4004    h->deblocking_filter = 1;
4005    h->slice_alpha_c0_offset = 0;
4006    h->slice_beta_offset = 0;
4007    if( h->pps.deblocking_filter_parameters_present ) {
4008        tmp= get_ue_golomb_31(&s->gb);
4009        if(tmp > 2){
4010            av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4011            return -1;
4012        }
4013        h->deblocking_filter= tmp;
4014        if(h->deblocking_filter < 2)
4015            h->deblocking_filter^= 1; // 1<->0
4016
4017        if( h->deblocking_filter ) {
4018            h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4019            h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4020        }
4021    }
4022
4023    if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
4024       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4025       ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
4026       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4027        h->deblocking_filter= 0;
4028
4029    if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4030        if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4031            /* Cheat slightly for speed:
4032               Do not bother to deblock across slices. */
4033            h->deblocking_filter = 2;
4034        } else {
4035            h0->max_contexts = 1;
4036            if(!h0->single_decode_warning) {
4037                av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4038                h0->single_decode_warning = 1;
4039            }
4040            if(h != h0)
4041                return 1; // deblocking switched inside frame
4042        }
4043    }
4044
4045#if 0 //FMO
4046    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4047        slice_group_change_cycle= get_bits(&s->gb, ?);
4048#endif
4049
4050    h0->last_slice_type = slice_type;
4051    h->slice_num = ++h0->current_slice;
4052    if(h->slice_num >= MAX_SLICES){
4053        av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4054    }
4055
4056    for(j=0; j<2; j++){
4057        int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4058        ref2frm[0]=
4059        ref2frm[1]= -1;
4060        for(i=0; i<16; i++)
4061            ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4062                          +(h->ref_list[j][i].reference&3);
4063        ref2frm[18+0]=
4064        ref2frm[18+1]= -1;
4065        for(i=16; i<48; i++)
4066            ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4067                          +(h->ref_list[j][i].reference&3);
4068    }
4069
4070    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4071    h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4072
4073    s->avctx->refs= h->sps.ref_frame_count;
4074
4075    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4076        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4077               h->slice_num,
4078               (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4079               first_mb_in_slice,
4080               av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4081               pps_id, h->frame_num,
4082               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4083               h->ref_count[0], h->ref_count[1],
4084               s->qscale,
4085               h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4086               h->use_weight,
4087               h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4088               h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4089               );
4090    }
4091
4092    return 0;
4093}
4094
4095/**
4096 *
4097 */
4098static inline int get_level_prefix(GetBitContext *gb){
4099    unsigned int buf;
4100    int log;
4101
4102    OPEN_READER(re, gb);
4103    UPDATE_CACHE(re, gb);
4104    buf=GET_CACHE(re, gb);
4105
4106    log= 32 - av_log2(buf);
4107#ifdef TRACE
4108    print_bin(buf>>(32-log), log);
4109    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4110#endif
4111
4112    LAST_SKIP_BITS(re, gb, log);
4113    CLOSE_READER(re, gb);
4114
4115    return log-1;
4116}
4117
4118static inline int get_dct8x8_allowed(H264Context *h){
4119    if(h->sps.direct_8x8_inference_flag)
4120        return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
4121    else
4122        return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4123}
4124
4125/**
4126 * decodes a residual block.
4127 * @param n block index
4128 * @param scantable scantable
4129 * @param max_coeff number of coefficients in the block
4130 * @return <0 if an error occurred
4131 */
4132static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4133    MpegEncContext * const s = &h->s;
4134    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4135    int level[16];
4136    int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4137
4138    //FIXME put trailing_onex into the context
4139
4140    if(n == CHROMA_DC_BLOCK_INDEX){
4141        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4142        total_coeff= coeff_token>>2;
4143    }else{
4144        if(n == LUMA_DC_BLOCK_INDEX){
4145            total_coeff= pred_non_zero_count(h, 0);
4146            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4147            total_coeff= coeff_token>>2;
4148        }else{
4149            total_coeff= pred_non_zero_count(h, n);
4150            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4151            total_coeff= coeff_token>>2;
4152            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4153        }
4154    }
4155
4156    //FIXME set last_non_zero?
4157
4158    if(total_coeff==0)
4159        return 0;
4160    if(total_coeff > (unsigned)max_coeff) {
4161        av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4162        return -1;
4163    }
4164
4165    trailing_ones= coeff_token&3;
4166    tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4167    assert(total_coeff<=16);
4168
4169    i = show_bits(gb, 3);
4170    skip_bits(gb, trailing_ones);
4171    level[0] = 1-((i&4)>>1);
4172    level[1] = 1-((i&2)   );
4173    level[2] = 1-((i&1)<<1);
4174
4175    if(trailing_ones<total_coeff) {
4176        int mask, prefix;
4177        int suffix_length = total_coeff > 10 && trailing_ones < 3;
4178        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4179        int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4180
4181        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4182        if(level_code >= 100){
4183            prefix= level_code - 100;
4184            if(prefix == LEVEL_TAB_BITS)
4185                prefix += get_level_prefix(gb);
4186
4187            //first coefficient has suffix_length equal to 0 or 1
4188            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4189                if(suffix_length)
4190                    level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4191                else
4192                    level_code= (prefix<<suffix_length); //part
4193            }else if(prefix==14){
4194                if(suffix_length)
4195                    level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4196                else
4197                    level_code= prefix + get_bits(gb, 4); //part
4198            }else{
4199                level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4200                if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4201                if(prefix>=16)
4202                    level_code += (1<<(prefix-3))-4096;
4203            }
4204
4205            if(trailing_ones < 3) level_code += 2;
4206
4207            suffix_length = 2;
4208            mask= -(level_code&1);
4209            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4210        }else{
4211            if(trailing_ones < 3) level_code += (level_code>>31)|1;
4212
4213            suffix_length = 1;
4214            if(level_code + 3U > 6U)
4215                suffix_length++;
4216            level[trailing_ones]= level_code;
4217        }
4218
4219        //remaining coefficients have suffix_length > 0
4220        for(i=trailing_ones+1;i<total_coeff;i++) {
4221            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4222            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4223            level_code= cavlc_level_tab[suffix_length][bitsi][0];
4224
4225            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4226            if(level_code >= 100){
4227                prefix= level_code - 100;
4228                if(prefix == LEVEL_TAB_BITS){
4229                    prefix += get_level_prefix(gb);
4230                }
4231                if(prefix<15){
4232                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4233                }else{
4234                    level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4235                    if(prefix>=16)
4236                        level_code += (1<<(prefix-3))-4096;
4237                }
4238                mask= -(level_code&1);
4239                level_code= (((2+level_code)>>1) ^ mask) - mask;
4240            }
4241            level[i]= level_code;
4242
4243            if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4244                suffix_length++;
4245        }
4246    }
4247
4248    if(total_coeff == max_coeff)
4249        zeros_left=0;
4250    else{
4251        if(n == CHROMA_DC_BLOCK_INDEX)
4252            zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4253        else
4254            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4255    }
4256
4257    coeff_num = zeros_left + total_coeff - 1;
4258    j = scantable[coeff_num];
4259    if(n > 24){
4260        block[j] = level[0];
4261        for(i=1;i<total_coeff;i++) {
4262            if(zeros_left <= 0)
4263                run_before = 0;
4264            else if(zeros_left < 7){
4265                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4266            }else{
4267                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4268            }
4269            zeros_left -= run_before;
4270            coeff_num -= 1 + run_before;
4271            j= scantable[ coeff_num ];
4272
4273            block[j]= level[i];
4274        }
4275    }else{
4276        block[j] = (level[0] * qmul[j] + 32)>>6;
4277        for(i=1;i<total_coeff;i++) {
4278            if(zeros_left <= 0)
4279                run_before = 0;
4280            else if(zeros_left < 7){
4281                run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4282            }else{
4283                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4284            }
4285            zeros_left -= run_before;
4286            coeff_num -= 1 + run_before;
4287            j= scantable[ coeff_num ];
4288
4289            block[j]= (level[i] * qmul[j] + 32)>>6;
4290        }
4291    }
4292
4293    if(zeros_left<0){
4294        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4295        return -1;
4296    }
4297
4298    return 0;
4299}
4300
4301static void predict_field_decoding_flag(H264Context *h){
4302    MpegEncContext * const s = &h->s;
4303    const int mb_xy= h->mb_xy;
4304    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4305                ? s->current_picture.mb_type[mb_xy-1]
4306                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4307                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4308                : 0;
4309    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4310}
4311
4312/**
4313 * decodes a P_SKIP or B_SKIP macroblock
4314 */
4315static void decode_mb_skip(H264Context *h){
4316    MpegEncContext * const s = &h->s;
4317    const int mb_xy= h->mb_xy;
4318    int mb_type=0;
4319
4320    memset(h->non_zero_count[mb_xy], 0, 16);
4321    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4322
4323    if(MB_FIELD)
4324        mb_type|= MB_TYPE_INTERLACED;
4325
4326    if( h->slice_type_nos == FF_B_TYPE )
4327    {
4328        // just for fill_caches. pred_direct_motion will set the real mb_type
4329        mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4330
4331        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4332        pred_direct_motion(h, &mb_type);
4333        mb_type|= MB_TYPE_SKIP;
4334    }
4335    else
4336    {
4337        int mx, my;
4338        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4339
4340        fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4341        pred_pskip_motion(h, &mx, &my);
4342        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4343        fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4344    }
4345
4346    write_back_motion(h, mb_type);
4347    s->current_picture.mb_type[mb_xy]= mb_type;
4348    s->current_picture.qscale_table[mb_xy]= s->qscale;
4349    h->slice_table[ mb_xy ]= h->slice_num;
4350    h->prev_mb_skipped= 1;
4351}
4352
4353/**
4354 * decodes a macroblock
4355 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4356 */
4357static int decode_mb_cavlc(H264Context *h){
4358    MpegEncContext * const s = &h->s;
4359    int mb_xy;
4360    int partition_count;
4361    unsigned int mb_type, cbp;
4362    int dct8x8_allowed= h->pps.transform_8x8_mode;
4363
4364    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4365
4366    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4367    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4368                down the code */
4369    if(h->slice_type_nos != FF_I_TYPE){
4370        if(s->mb_skip_run==-1)
4371            s->mb_skip_run= get_ue_golomb(&s->gb);
4372
4373        if (s->mb_skip_run--) {
4374            if(FRAME_MBAFF && (s->mb_y&1) == 0){
4375                if(s->mb_skip_run==0)
4376                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4377                else
4378                    predict_field_decoding_flag(h);
4379            }
4380            decode_mb_skip(h);
4381            return 0;
4382        }
4383    }
4384    if(FRAME_MBAFF){
4385        if( (s->mb_y&1) == 0 )
4386            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4387    }
4388
4389    h->prev_mb_skipped= 0;
4390
4391    mb_type= get_ue_golomb(&s->gb);
4392    if(h->slice_type_nos == FF_B_TYPE){
4393        if(mb_type < 23){
4394            partition_count= b_mb_type_info[mb_type].partition_count;
4395            mb_type=         b_mb_type_info[mb_type].type;
4396        }else{
4397            mb_type -= 23;
4398            goto decode_intra_mb;
4399        }
4400    }else if(h->slice_type_nos == FF_P_TYPE){
4401        if(mb_type < 5){
4402            partition_count= p_mb_type_info[mb_type].partition_count;
4403            mb_type=         p_mb_type_info[mb_type].type;
4404        }else{
4405            mb_type -= 5;
4406            goto decode_intra_mb;
4407        }
4408    }else{
4409       assert(h->slice_type_nos == FF_I_TYPE);
4410        if(h->slice_type == FF_SI_TYPE && mb_type)
4411            mb_type--;
4412decode_intra_mb:
4413        if(mb_type > 25){
4414            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4415            return -1;
4416        }
4417        partition_count=0;
4418        cbp= i_mb_type_info[mb_type].cbp;
4419        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4420        mb_type= i_mb_type_info[mb_type].type;
4421    }
4422
4423    if(MB_FIELD)
4424        mb_type |= MB_TYPE_INTERLACED;
4425
4426    h->slice_table[ mb_xy ]= h->slice_num;
4427
4428    if(IS_INTRA_PCM(mb_type)){
4429        unsigned int x;
4430
4431        // We assume these blocks are very rare so we do not optimize it.
4432        align_get_bits(&s->gb);
4433
4434        // The pixels are stored in the same order as levels in h->mb array.
4435        for(x=0; x < (CHROMA ? 384 : 256); x++){
4436            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4437        }
4438
4439        // In deblocking, the quantizer is 0
4440        s->current_picture.qscale_table[mb_xy]= 0;
4441        // All coeffs are present
4442        memset(h->non_zero_count[mb_xy], 16, 16);
4443
4444        s->current_picture.mb_type[mb_xy]= mb_type;
4445        return 0;
4446    }
4447
4448    if(MB_MBAFF){
4449        h->ref_count[0] <<= 1;
4450        h->ref_count[1] <<= 1;
4451    }
4452
4453    fill_caches(h, mb_type, 0);
4454
4455    //mb_pred
4456    if(IS_INTRA(mb_type)){
4457        int pred_mode;
4458//            init_top_left_availability(h);
4459        if(IS_INTRA4x4(mb_type)){
4460            int i;
4461            int di = 1;
4462            if(dct8x8_allowed && get_bits1(&s->gb)){
4463                mb_type |= MB_TYPE_8x8DCT;
4464                di = 4;
4465            }
4466
4467//                fill_intra4x4_pred_table(h);
4468            for(i=0; i<16; i+=di){
4469                int mode= pred_intra_mode(h, i);
4470
4471                if(!get_bits1(&s->gb)){
4472                    const int rem_mode= get_bits(&s->gb, 3);
4473                    mode = rem_mode + (rem_mode >= mode);
4474                }
4475
4476                if(di==4)
4477                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4478                else
4479                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4480            }
4481            write_back_intra_pred_mode(h);
4482            if( check_intra4x4_pred_mode(h) < 0)
4483                return -1;
4484        }else{
4485            h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4486            if(h->intra16x16_pred_mode < 0)
4487                return -1;
4488        }
4489        if(CHROMA){
4490            pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4491            if(pred_mode < 0)
4492                return -1;
4493            h->chroma_pred_mode= pred_mode;
4494        }
4495    }else if(partition_count==4){
4496        int i, j, sub_partition_count[4], list, ref[2][4];
4497
4498        if(h->slice_type_nos == FF_B_TYPE){
4499            for(i=0; i<4; i++){
4500                h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4501                if(h->sub_mb_type[i] >=13){
4502                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4503                    return -1;
4504                }
4505                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4506                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4507            }
4508            if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4509               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4510                pred_direct_motion(h, &mb_type);
4511                h->ref_cache[0][scan8[4]] =
4512                h->ref_cache[1][scan8[4]] =
4513                h->ref_cache[0][scan8[12]] =
4514                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4515            }
4516        }else{
4517            assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4518            for(i=0; i<4; i++){
4519                h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4520                if(h->sub_mb_type[i] >=4){
4521                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4522                    return -1;
4523                }
4524                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4525                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4526            }
4527        }
4528
4529        for(list=0; list<h->list_count; list++){
4530            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4531            for(i=0; i<4; i++){
4532                if(IS_DIRECT(h->sub_mb_type[i])) continue;
4533                if(IS_DIR(h->sub_mb_type[i], 0, list)){
4534                    unsigned int tmp;
4535                    if(ref_count == 1){
4536                        tmp= 0;
4537                    }else if(ref_count == 2){
4538                        tmp= get_bits1(&s->gb)^1;
4539                    }else{
4540                        tmp= get_ue_golomb_31(&s->gb);
4541                        if(tmp>=ref_count){
4542                            av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4543                            return -1;
4544                        }
4545                    }
4546                    ref[list][i]= tmp;
4547                }else{
4548                 //FIXME
4549                    ref[list][i] = -1;
4550                }
4551            }
4552        }
4553
4554        if(dct8x8_allowed)
4555            dct8x8_allowed = get_dct8x8_allowed(h);
4556
4557        for(list=0; list<h->list_count; list++){
4558            for(i=0; i<4; i++){
4559                if(IS_DIRECT(h->sub_mb_type[i])) {
4560                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4561                    continue;
4562                }
4563                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4564                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4565
4566                if(IS_DIR(h->sub_mb_type[i], 0, list)){
4567                    const int sub_mb_type= h->sub_mb_type[i];
4568                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4569                    for(j=0; j<sub_partition_count[i]; j++){
4570                        int mx, my;
4571                        const int index= 4*i + block_width*j;
4572                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4573                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4574                        mx += get_se_golomb(&s->gb);
4575                        my += get_se_golomb(&s->gb);
4576                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4577
4578                        if(IS_SUB_8X8(sub_mb_type)){
4579                            mv_cache[ 1 ][0]=
4580                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4581                            mv_cache[ 1 ][1]=
4582                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4583                        }else if(IS_SUB_8X4(sub_mb_type)){
4584                            mv_cache[ 1 ][0]= mx;
4585                            mv_cache[ 1 ][1]= my;
4586                        }else if(IS_SUB_4X8(sub_mb_type)){
4587                            mv_cache[ 8 ][0]= mx;
4588                            mv_cache[ 8 ][1]= my;
4589                        }
4590                        mv_cache[ 0 ][0]= mx;
4591                        mv_cache[ 0 ][1]= my;
4592                    }
4593                }else{
4594                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4595                    p[0] = p[1]=
4596                    p[8] = p[9]= 0;
4597                }
4598            }
4599        }
4600    }else if(IS_DIRECT(mb_type)){
4601        pred_direct_motion(h, &mb_type);
4602        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4603    }else{
4604        int list, mx, my, i;
4605         //FIXME we should set ref_idx_l? to 0 if we use that later ...
4606        if(IS_16X16(mb_type)){
4607            for(list=0; list<h->list_count; list++){
4608                    unsigned int val;
4609                    if(IS_DIR(mb_type, 0, list)){
4610                        if(h->ref_count[list]==1){
4611                            val= 0;
4612                        }else if(h->ref_count[list]==2){
4613                            val= get_bits1(&s->gb)^1;
4614                        }else{
4615                            val= get_ue_golomb_31(&s->gb);
4616                            if(val >= h->ref_count[list]){
4617                                av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4618                                return -1;
4619                            }
4620                        }
4621                    }else
4622                        val= LIST_NOT_USED&0xFF;
4623                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4624            }
4625            for(list=0; list<h->list_count; list++){
4626                unsigned int val;
4627                if(IS_DIR(mb_type, 0, list)){
4628                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4629                    mx += get_se_golomb(&s->gb);
4630                    my += get_se_golomb(&s->gb);
4631                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4632
4633                    val= pack16to32(mx,my);
4634                }else
4635                    val=0;
4636                fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4637            }
4638        }
4639        else if(IS_16X8(mb_type)){
4640            for(list=0; list<h->list_count; list++){
4641                    for(i=0; i<2; i++){
4642                        unsigned int val;
4643                        if(IS_DIR(mb_type, i, list)){
4644                            if(h->ref_count[list] == 1){
4645                                val= 0;
4646                            }else if(h->ref_count[list] == 2){
4647                                val= get_bits1(&s->gb)^1;
4648                            }else{
4649                                val= get_ue_golomb_31(&s->gb);
4650                                if(val >= h->ref_count[list]){
4651                                    av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4652                                    return -1;
4653                                }
4654                            }
4655                        }else
4656                            val= LIST_NOT_USED&0xFF;
4657                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4658                    }
4659            }
4660            for(list=0; list<h->list_count; list++){
4661                for(i=0; i<2; i++){
4662                    unsigned int val;
4663                    if(IS_DIR(mb_type, i, list)){
4664                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4665                        mx += get_se_golomb(&s->gb);
4666                        my += get_se_golomb(&s->gb);
4667                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4668
4669                        val= pack16to32(mx,my);
4670                    }else
4671                        val=0;
4672                    fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4673                }
4674            }
4675        }else{
4676            assert(IS_8X16(mb_type));
4677            for(list=0; list<h->list_count; list++){
4678                    for(i=0; i<2; i++){
4679                        unsigned int val;
4680                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4681                            if(h->ref_count[list]==1){
4682                                val= 0;
4683                            }else if(h->ref_count[list]==2){
4684                                val= get_bits1(&s->gb)^1;
4685                            }else{
4686                                val= get_ue_golomb_31(&s->gb);
4687                                if(val >= h->ref_count[list]){
4688                                    av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4689                                    return -1;
4690                                }
4691                            }
4692                        }else
4693                            val= LIST_NOT_USED&0xFF;
4694                        fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4695                    }
4696            }
4697            for(list=0; list<h->list_count; list++){
4698                for(i=0; i<2; i++){
4699                    unsigned int val;
4700                    if(IS_DIR(mb_type, i, list)){
4701                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4702                        mx += get_se_golomb(&s->gb);
4703                        my += get_se_golomb(&s->gb);
4704                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4705
4706                        val= pack16to32(mx,my);
4707                    }else
4708                        val=0;
4709                    fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4710                }
4711            }
4712        }
4713    }
4714
4715    if(IS_INTER(mb_type))
4716        write_back_motion(h, mb_type);
4717
4718    if(!IS_INTRA16x16(mb_type)){
4719        cbp= get_ue_golomb(&s->gb);
4720        if(cbp > 47){
4721            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4722            return -1;
4723        }
4724
4725        if(CHROMA){
4726            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4727            else                     cbp= golomb_to_inter_cbp   [cbp];
4728        }else{
4729            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4730            else                     cbp= golomb_to_inter_cbp_gray[cbp];
4731        }
4732    }
4733    h->cbp = cbp;
4734
4735    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4736        if(get_bits1(&s->gb)){
4737            mb_type |= MB_TYPE_8x8DCT;
4738            h->cbp_table[mb_xy]= cbp;
4739        }
4740    }
4741    s->current_picture.mb_type[mb_xy]= mb_type;
4742
4743    if(cbp || IS_INTRA16x16(mb_type)){
4744        int i8x8, i4x4, chroma_idx;
4745        int dquant;
4746        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4747        const uint8_t *scan, *scan8x8, *dc_scan;
4748
4749//        fill_non_zero_count_cache(h);
4750
4751        if(IS_INTERLACED(mb_type)){
4752            scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4753            scan= s->qscale ? h->field_scan : h->field_scan_q0;
4754            dc_scan= luma_dc_field_scan;
4755        }else{
4756            scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4757            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4758            dc_scan= luma_dc_zigzag_scan;
4759        }
4760
4761        dquant= get_se_golomb(&s->gb);
4762
4763        if( dquant > 25 || dquant < -26 ){
4764            av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4765            return -1;
4766        }
4767
4768        s->qscale += dquant;
4769        if(((unsigned)s->qscale) > 51){
4770            if(s->qscale<0) s->qscale+= 52;
4771            else            s->qscale-= 52;
4772        }
4773
4774        h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4775        h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4776        if(IS_INTRA16x16(mb_type)){
4777            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4778                return -1; //FIXME continue if partitioned and other return -1 too
4779            }
4780
4781            assert((cbp&15) == 0 || (cbp&15) == 15);
4782
4783            if(cbp&15){
4784                for(i8x8=0; i8x8<4; i8x8++){
4785                    for(i4x4=0; i4x4<4; i4x4++){
4786                        const int index= i4x4 + 4*i8x8;
4787                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4788                            return -1;
4789                        }
4790                    }
4791                }
4792            }else{
4793                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4794            }
4795        }else{
4796            for(i8x8=0; i8x8<4; i8x8++){
4797                if(cbp & (1<<i8x8)){
4798                    if(IS_8x8DCT(mb_type)){
4799                        DCTELEM *buf = &h->mb[64*i8x8];
4800                        uint8_t *nnz;
4801                        for(i4x4=0; i4x4<4; i4x4++){
4802                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4803                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4804                                return -1;
4805                        }
4806                        nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4807                        nnz[0] += nnz[1] + nnz[8] + nnz[9];
4808                    }else{
4809                        for(i4x4=0; i4x4<4; i4x4++){
4810                            const int index= i4x4 + 4*i8x8;
4811
4812                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4813                                return -1;
4814                            }
4815                        }
4816                    }
4817                }else{
4818                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4819                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4820                }
4821            }
4822        }
4823
4824        if(cbp&0x30){
4825            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4826                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4827                    return -1;
4828                }
4829        }
4830
4831        if(cbp&0x20){
4832            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4833                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4834                for(i4x4=0; i4x4<4; i4x4++){
4835                    const int index= 16 + 4*chroma_idx + i4x4;
4836                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4837                        return -1;
4838                    }
4839                }
4840            }
4841        }else{
4842            uint8_t * const nnz= &h->non_zero_count_cache[0];
4843            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4844            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4845        }
4846    }else{
4847        uint8_t * const nnz= &h->non_zero_count_cache[0];
4848        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4849        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4850        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4851    }
4852    s->current_picture.qscale_table[mb_xy]= s->qscale;
4853    write_back_non_zero_count(h);
4854
4855    if(MB_MBAFF){
4856        h->ref_count[0] >>= 1;
4857        h->ref_count[1] >>= 1;
4858    }
4859
4860    return 0;
4861}
4862
4863static int decode_cabac_field_decoding_flag(H264Context *h) {
4864    MpegEncContext * const s = &h->s;
4865    const int mb_x = s->mb_x;
4866    const int mb_y = s->mb_y & ~1;
4867    const int mba_xy = mb_x - 1 +  mb_y   *s->mb_stride;
4868    const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
4869
4870    unsigned int ctx = 0;
4871
4872    if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4873        ctx += 1;
4874    }
4875    if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4876        ctx += 1;
4877    }
4878
4879    return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4880}
4881
4882static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4883    uint8_t *state= &h->cabac_state[ctx_base];
4884    int mb_type;
4885
4886    if(intra_slice){
4887        MpegEncContext * const s = &h->s;
4888        const int mba_xy = h->left_mb_xy[0];
4889        const int mbb_xy = h->top_mb_xy;
4890        int ctx=0;
4891        if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4892            ctx++;
4893        if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4894            ctx++;
4895        if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4896            return 0;   /* I4x4 */
4897        state += 2;
4898    }else{
4899        if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4900            return 0;   /* I4x4 */
4901    }
4902
4903    if( get_cabac_terminate( &h->cabac ) )
4904        return 25;  /* PCM */
4905
4906    mb_type = 1; /* I16x16 */
4907    mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4908    if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4909        mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4910    mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4911    mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4912    return mb_type;
4913}
4914
4915static int decode_cabac_mb_type_b( H264Context *h ) {
4916    MpegEncContext * const s = &h->s;
4917
4918        const int mba_xy = h->left_mb_xy[0];
4919        const int mbb_xy = h->top_mb_xy;
4920        int ctx = 0;
4921        int bits;
4922        assert(h->slice_type_nos == FF_B_TYPE);
4923
4924        if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4925            ctx++;
4926        if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4927            ctx++;
4928
4929        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4930            return 0; /* B_Direct_16x16 */
4931
4932        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4933            return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4934        }
4935
4936        bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4937        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4938        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4939        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4940        if( bits < 8 )
4941            return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4942        else if( bits == 13 ) {
4943            return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4944        } else if( bits == 14 )
4945            return 11; /* B_L1_L0_8x16 */
4946        else if( bits == 15 )
4947            return 22; /* B_8x8 */
4948
4949        bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4950        return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4951}
4952
4953static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4954    MpegEncContext * const s = &h->s;
4955    int mba_xy, mbb_xy;
4956    int ctx = 0;
4957
4958    if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4959        int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4960        mba_xy = mb_xy - 1;
4961        if( (mb_y&1)
4962            && h->slice_table[mba_xy] == h->slice_num
4963            && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4964            mba_xy += s->mb_stride;
4965        if( MB_FIELD ){
4966            mbb_xy = mb_xy - s->mb_stride;
4967            if( !(mb_y&1)
4968                && h->slice_table[mbb_xy] == h->slice_num
4969                && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4970                mbb_xy -= s->mb_stride;
4971        }else
4972            mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4973    }else{
4974        int mb_xy = h->mb_xy;
4975        mba_xy = mb_xy - 1;
4976        mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4977    }
4978
4979    if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4980        ctx++;
4981    if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4982        ctx++;
4983
4984    if( h->slice_type_nos == FF_B_TYPE )
4985        ctx += 13;
4986    return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4987}
4988
4989static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4990    int mode = 0;
4991
4992    if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4993        return pred_mode;
4994
4995    mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4996    mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4997    mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4998
4999    if( mode >= pred_mode )
5000        return mode + 1;
5001    else
5002        return mode;
5003}
5004
5005static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5006    const int mba_xy = h->left_mb_xy[0];
5007    const int mbb_xy = h->top_mb_xy;
5008
5009    int ctx = 0;
5010
5011    /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5012    if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5013        ctx++;
5014
5015    if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5016        ctx++;
5017
5018    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5019        return 0;
5020
5021    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5022        return 1;
5023    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5024        return 2;
5025    else
5026        return 3;
5027}
5028
5029static int decode_cabac_mb_cbp_luma( H264Context *h) {
5030    int cbp_b, cbp_a, ctx, cbp = 0;
5031
5032    cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5033    cbp_b = h->slice_table[h->top_mb_xy]     == h->slice_num ? h->top_cbp  : -1;
5034
5035    ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5036    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5037    ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
5038    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5039    ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
5040    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5041    ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
5042    cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5043    return cbp;
5044}
5045static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5046    int ctx;
5047    int cbp_a, cbp_b;
5048
5049    cbp_a = (h->left_cbp>>4)&0x03;
5050    cbp_b = (h-> top_cbp>>4)&0x03;
5051
5052    ctx = 0;
5053    if( cbp_a > 0 ) ctx++;
5054    if( cbp_b > 0 ) ctx += 2;
5055    if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5056        return 0;
5057
5058    ctx = 4;
5059    if( cbp_a == 2 ) ctx++;
5060    if( cbp_b == 2 ) ctx += 2;
5061    return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5062}
5063static int decode_cabac_mb_dqp( H264Context *h) {
5064    int   ctx= h->last_qscale_diff != 0;
5065    int   val = 0;
5066
5067    while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5068        ctx= 2+(ctx>>1);
5069        val++;
5070        if(val > 102) //prevent infinite loop
5071            return INT_MIN;
5072    }
5073
5074    if( val&0x01 )
5075        return   (val + 1)>>1 ;
5076    else
5077        return -((val + 1)>>1);
5078}
5079static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5080    if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5081        return 0;   /* 8x8 */
5082    if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5083        return 1;   /* 8x4 */
5084    if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5085        return 2;   /* 4x8 */
5086    return 3;       /* 4x4 */
5087}
5088static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5089    int type;
5090    if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5091        return 0;   /* B_Direct_8x8 */
5092    if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5093        return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5094    type = 3;
5095    if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5096        if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5097            return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5098        type += 4;
5099    }
5100    type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5101    type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
5102    return type;
5103}
5104
5105static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5106    return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5107}
5108
5109static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5110    int refa = h->ref_cache[list][scan8[n] - 1];
5111    int refb = h->ref_cache[list][scan8[n] - 8];
5112    int ref  = 0;
5113    int ctx  = 0;
5114
5115    if( h->slice_type_nos == FF_B_TYPE) {
5116        if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5117            ctx++;
5118        if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5119            ctx += 2;
5120    } else {
5121        if( refa > 0 )
5122            ctx++;
5123        if( refb > 0 )
5124            ctx += 2;
5125    }
5126
5127    while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5128        ref++;
5129        ctx = (ctx>>2)+4;
5130        if(ref >= 32 /*h->ref_list[list]*/){
5131            return -1;
5132        }
5133    }
5134    return ref;
5135}
5136
5137static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5138    int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5139               abs( h->mvd_cache[list][scan8[n] - 8][l] );
5140    int ctxbase = (l == 0) ? 40 : 47;
5141    int mvd;
5142    int ctx = (amvd>2) + (amvd>32);
5143
5144    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5145        return 0;
5146
5147    mvd= 1;
5148    ctx= 3;
5149    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5150        mvd++;
5151        if( ctx < 6 )
5152            ctx++;
5153    }
5154
5155    if( mvd >= 9 ) {
5156        int k = 3;
5157        while( get_cabac_bypass( &h->cabac ) ) {
5158            mvd += 1 << k;
5159            k++;
5160            if(k>24){
5161                av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5162                return INT_MIN;
5163            }
5164        }
5165        while( k-- ) {
5166            if( get_cabac_bypass( &h->cabac ) )
5167                mvd += 1 << k;
5168        }
5169    }
5170    return get_cabac_bypass_sign( &h->cabac, -mvd );
5171}
5172
5173static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5174    int nza, nzb;
5175    int ctx = 0;
5176
5177    if( is_dc ) {
5178        if( cat == 0 ) {
5179            nza = h->left_cbp&0x100;
5180            nzb = h-> top_cbp&0x100;
5181        } else {
5182            nza = (h->left_cbp>>(6+idx))&0x01;
5183            nzb = (h-> top_cbp>>(6+idx))&0x01;
5184        }
5185    } else {
5186        assert(cat == 1 || cat == 2 || cat == 4);
5187        nza = h->non_zero_count_cache[scan8[idx] - 1];
5188        nzb = h->non_zero_count_cache[scan8[idx] - 8];
5189    }
5190
5191    if( nza > 0 )
5192        ctx++;
5193
5194    if( nzb > 0 )
5195        ctx += 2;
5196
5197    return ctx + 4 * cat;
5198}
5199
5200DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5201    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5202    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5203    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5204    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5205};
5206
5207static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5208    static const int significant_coeff_flag_offset[2][6] = {
5209      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5210      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5211    };
5212    static const int last_coeff_flag_offset[2][6] = {
5213      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5214      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5215    };
5216    static const int coeff_abs_level_m1_offset[6] = {
5217        227+0, 227+10, 227+20, 227+30, 227+39, 426
5218    };
5219    static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5220      { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5221        4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5222        7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5223       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5224      { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5225        6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5226        9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5227        9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5228    };
5229    /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5230     * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5231     * map node ctx => cabac ctx for level=1 */
5232    static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5233    /* map node ctx => cabac ctx for level>1 */
5234    static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5235    static const uint8_t coeff_abs_level_transition[2][8] = {
5236    /* update node ctx after decoding a level=1 */
5237        { 1, 2, 3, 3, 4, 5, 6, 7 },
5238    /* update node ctx after decoding a level>1 */
5239        { 4, 4, 4, 4, 5, 6, 7, 7 }
5240    };
5241
5242    int index[64];
5243
5244    int av_unused last;
5245    int coeff_count = 0;
5246    int node_ctx = 0;
5247
5248    uint8_t *significant_coeff_ctx_base;
5249    uint8_t *last_coeff_ctx_base;
5250    uint8_t *abs_level_m1_ctx_base;
5251
5252#if !ARCH_X86
5253#define CABAC_ON_STACK
5254#endif
5255#ifdef CABAC_ON_STACK
5256#define CC &cc
5257    CABACContext cc;
5258    cc.range     = h->cabac.range;
5259    cc.low       = h->cabac.low;
5260    cc.bytestream= h->cabac.bytestream;
5261#else
5262#define CC &h->cabac
5263#endif
5264
5265
5266    /* cat: 0-> DC 16x16  n = 0
5267     *      1-> AC 16x16  n = luma4x4idx
5268     *      2-> Luma4x4   n = luma4x4idx
5269     *      3-> DC Chroma n = iCbCr
5270     *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5271     *      5-> Luma8x8   n = 4 * luma8x8idx
5272     */
5273
5274    /* read coded block flag */
5275    if( is_dc || cat != 5 ) {
5276        if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5277            if( !is_dc )
5278                h->non_zero_count_cache[scan8[n]] = 0;
5279
5280#ifdef CABAC_ON_STACK
5281            h->cabac.range     = cc.range     ;
5282            h->cabac.low       = cc.low       ;
5283            h->cabac.bytestream= cc.bytestream;
5284#endif
5285            return;
5286        }
5287    }
5288
5289    significant_coeff_ctx_base = h->cabac_state
5290        + significant_coeff_flag_offset[MB_FIELD][cat];
5291    last_coeff_ctx_base = h->cabac_state
5292        + last_coeff_flag_offset[MB_FIELD][cat];
5293    abs_level_m1_ctx_base = h->cabac_state
5294        + coeff_abs_level_m1_offset[cat];
5295
5296    if( !is_dc && cat == 5 ) {
5297#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5298        for(last= 0; last < coefs; last++) { \
5299            uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5300            if( get_cabac( CC, sig_ctx )) { \
5301                uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5302                index[coeff_count++] = last; \
5303                if( get_cabac( CC, last_ctx ) ) { \
5304                    last= max_coeff; \
5305                    break; \
5306                } \
5307            } \
5308        }\
5309        if( last == max_coeff -1 ) {\
5310            index[coeff_count++] = last;\
5311        }
5312        const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5313#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5314        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5315    } else {
5316        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5317#else
5318        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5319    } else {
5320        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5321#endif
5322    }
5323    assert(coeff_count > 0);
5324
5325    if( is_dc ) {
5326        if( cat == 0 )
5327            h->cbp_table[h->mb_xy] |= 0x100;
5328        else
5329            h->cbp_table[h->mb_xy] |= 0x40 << n;
5330    } else {
5331        if( cat == 5 )
5332            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5333        else {
5334            assert( cat == 1 || cat == 2 || cat == 4 );
5335            h->non_zero_count_cache[scan8[n]] = coeff_count;
5336        }
5337    }
5338
5339    do {
5340        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5341
5342        int j= scantable[index[--coeff_count]];
5343
5344        if( get_cabac( CC, ctx ) == 0 ) {
5345            node_ctx = coeff_abs_level_transition[0][node_ctx];
5346            if( is_dc ) {
5347                block[j] = get_cabac_bypass_sign( CC, -1);
5348            }else{
5349                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5350            }
5351        } else {
5352            int coeff_abs = 2;
5353            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5354            node_ctx = coeff_abs_level_transition[1][node_ctx];
5355
5356            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5357                coeff_abs++;
5358            }
5359
5360            if( coeff_abs >= 15 ) {
5361                int j = 0;
5362                while( get_cabac_bypass( CC ) ) {
5363                    j++;
5364                }
5365
5366                coeff_abs=1;
5367                while( j-- ) {
5368                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
5369                }
5370                coeff_abs+= 14;
5371            }
5372
5373            if( is_dc ) {
5374                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5375            }else{
5376                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5377            }
5378        }
5379    } while( coeff_count );
5380#ifdef CABAC_ON_STACK
5381            h->cabac.range     = cc.range     ;
5382            h->cabac.low       = cc.low       ;
5383            h->cabac.bytestream= cc.bytestream;
5384#endif
5385
5386}
5387
5388#if !CONFIG_SMALL
5389static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5390    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5391}
5392
5393static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5394    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5395}
5396#endif
5397
5398static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5399#if CONFIG_SMALL
5400    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5401#else
5402    if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5403    else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5404#endif
5405}
5406
5407static inline void compute_mb_neighbors(H264Context *h)
5408{
5409    MpegEncContext * const s = &h->s;
5410    const int mb_xy  = h->mb_xy;
5411    h->top_mb_xy     = mb_xy - s->mb_stride;
5412    h->left_mb_xy[0] = mb_xy - 1;
5413    if(FRAME_MBAFF){
5414        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
5415        const int top_pair_xy      = pair_xy     - s->mb_stride;
5416        const int top_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5417        const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5418        const int curr_mb_field_flag = MB_FIELD;
5419        const int bottom = (s->mb_y & 1);
5420
5421        if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5422            h->top_mb_xy -= s->mb_stride;
5423        }
5424        if (!left_mb_field_flag == curr_mb_field_flag) {
5425            h->left_mb_xy[0] = pair_xy - 1;
5426        }
5427    } else if (FIELD_PICTURE) {
5428        h->top_mb_xy -= s->mb_stride;
5429    }
5430    return;
5431}
5432
5433/**
5434 * decodes a macroblock
5435 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5436 */
5437static int decode_mb_cabac(H264Context *h) {
5438    MpegEncContext * const s = &h->s;
5439    int mb_xy;
5440    int mb_type, partition_count, cbp = 0;
5441    int dct8x8_allowed= h->pps.transform_8x8_mode;
5442
5443    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5444
5445    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5446    if( h->slice_type_nos != FF_I_TYPE ) {
5447        int skip;
5448        /* a skipped mb needs the aff flag from the following mb */
5449        if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5450            predict_field_decoding_flag(h);
5451        if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5452            skip = h->next_mb_skipped;
5453        else
5454            skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5455        /* read skip flags */
5456        if( skip ) {
5457            if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5458                s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5459                h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5460                if(!h->next_mb_skipped)
5461                    h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5462            }
5463
5464            decode_mb_skip(h);
5465
5466            h->cbp_table[mb_xy] = 0;
5467            h->chroma_pred_mode_table[mb_xy] = 0;
5468            h->last_qscale_diff = 0;
5469
5470            return 0;
5471
5472        }
5473    }
5474    if(FRAME_MBAFF){
5475        if( (s->mb_y&1) == 0 )
5476            h->mb_mbaff =
5477            h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5478    }
5479
5480    h->prev_mb_skipped = 0;
5481
5482    compute_mb_neighbors(h);
5483
5484    if( h->slice_type_nos == FF_B_TYPE ) {
5485        mb_type = decode_cabac_mb_type_b( h );
5486        if( mb_type < 23 ){
5487            partition_count= b_mb_type_info[mb_type].partition_count;
5488            mb_type=         b_mb_type_info[mb_type].type;
5489        }else{
5490            mb_type -= 23;
5491            goto decode_intra_mb;
5492        }
5493    } else if( h->slice_type_nos == FF_P_TYPE ) {
5494        if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5495            /* P-type */
5496            if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5497                /* P_L0_D16x16, P_8x8 */
5498                mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5499            } else {
5500                /* P_L0_D8x16, P_L0_D16x8 */
5501                mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5502            }
5503            partition_count= p_mb_type_info[mb_type].partition_count;
5504            mb_type=         p_mb_type_info[mb_type].type;
5505        } else {
5506            mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5507            goto decode_intra_mb;
5508        }
5509    } else {
5510        mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5511        if(h->slice_type == FF_SI_TYPE && mb_type)
5512            mb_type--;
5513        assert(h->slice_type_nos == FF_I_TYPE);
5514decode_intra_mb:
5515        partition_count = 0;
5516        cbp= i_mb_type_info[mb_type].cbp;
5517        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5518        mb_type= i_mb_type_info[mb_type].type;
5519    }
5520    if(MB_FIELD)
5521        mb_type |= MB_TYPE_INTERLACED;
5522
5523    h->slice_table[ mb_xy ]= h->slice_num;
5524
5525    if(IS_INTRA_PCM(mb_type)) {
5526        const uint8_t *ptr;
5527
5528        // We assume these blocks are very rare so we do not optimize it.
5529        // FIXME The two following lines get the bitstream position in the cabac
5530        // decode, I think it should be done by a function in cabac.h (or cabac.c).
5531        ptr= h->cabac.bytestream;
5532        if(h->cabac.low&0x1) ptr--;
5533        if(CABAC_BITS==16){
5534            if(h->cabac.low&0x1FF) ptr--;
5535        }
5536
5537        // The pixels are stored in the same order as levels in h->mb array.
5538        memcpy(h->mb, ptr, 256); ptr+=256;
5539        if(CHROMA){
5540            memcpy(h->mb+128, ptr, 128); ptr+=128;
5541        }
5542
5543        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5544
5545        // All blocks are present
5546        h->cbp_table[mb_xy] = 0x1ef;
5547        h->chroma_pred_mode_table[mb_xy] = 0;
5548        // In deblocking, the quantizer is 0
5549        s->current_picture.qscale_table[mb_xy]= 0;
5550        // All coeffs are present
5551        memset(h->non_zero_count[mb_xy], 16, 16);
5552        s->current_picture.mb_type[mb_xy]= mb_type;
5553        h->last_qscale_diff = 0;
5554        return 0;
5555    }
5556
5557    if(MB_MBAFF){
5558        h->ref_count[0] <<= 1;
5559        h->ref_count[1] <<= 1;
5560    }
5561
5562    fill_caches(h, mb_type, 0);
5563
5564    if( IS_INTRA( mb_type ) ) {
5565        int i, pred_mode;
5566        if( IS_INTRA4x4( mb_type ) ) {
5567            if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5568                mb_type |= MB_TYPE_8x8DCT;
5569                for( i = 0; i < 16; i+=4 ) {
5570                    int pred = pred_intra_mode( h, i );
5571                    int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5572                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5573                }
5574            } else {
5575                for( i = 0; i < 16; i++ ) {
5576                    int pred = pred_intra_mode( h, i );
5577                    h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5578
5579                //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5580                }
5581            }
5582            write_back_intra_pred_mode(h);
5583            if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5584        } else {
5585            h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5586            if( h->intra16x16_pred_mode < 0 ) return -1;
5587        }
5588        if(CHROMA){
5589            h->chroma_pred_mode_table[mb_xy] =
5590            pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
5591
5592            pred_mode= check_intra_pred_mode( h, pred_mode );
5593            if( pred_mode < 0 ) return -1;
5594            h->chroma_pred_mode= pred_mode;
5595        }
5596    } else if( partition_count == 4 ) {
5597        int i, j, sub_partition_count[4], list, ref[2][4];
5598
5599        if( h->slice_type_nos == FF_B_TYPE ) {
5600            for( i = 0; i < 4; i++ ) {
5601                h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5602                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5603                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5604            }
5605            if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5606                          h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5607                pred_direct_motion(h, &mb_type);
5608                h->ref_cache[0][scan8[4]] =
5609                h->ref_cache[1][scan8[4]] =
5610                h->ref_cache[0][scan8[12]] =
5611                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5612                if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5613                    for( i = 0; i < 4; i++ )
5614                        if( IS_DIRECT(h->sub_mb_type[i]) )
5615                            fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5616                }
5617            }
5618        } else {
5619            for( i = 0; i < 4; i++ ) {
5620                h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5621                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5622                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5623            }
5624        }
5625
5626        for( list = 0; list < h->list_count; list++ ) {
5627                for( i = 0; i < 4; i++ ) {
5628                    if(IS_DIRECT(h->sub_mb_type[i])) continue;
5629                    if(IS_DIR(h->sub_mb_type[i], 0, list)){
5630                        if( h->ref_count[list] > 1 ){
5631                            ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5632                            if(ref[list][i] >= (unsigned)h->ref_count[list]){
5633                                av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5634                                return -1;
5635                            }
5636                        }else
5637                            ref[list][i] = 0;
5638                    } else {
5639                        ref[list][i] = -1;
5640                    }
5641                                                       h->ref_cache[list][ scan8[4*i]+1 ]=
5642                    h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5643                }
5644        }
5645
5646        if(dct8x8_allowed)
5647            dct8x8_allowed = get_dct8x8_allowed(h);
5648
5649        for(list=0; list<h->list_count; list++){
5650            for(i=0; i<4; i++){
5651                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
5652                if(IS_DIRECT(h->sub_mb_type[i])){
5653                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5654                    continue;
5655                }
5656
5657                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5658                    const int sub_mb_type= h->sub_mb_type[i];
5659                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5660                    for(j=0; j<sub_partition_count[i]; j++){
5661                        int mpx, mpy;
5662                        int mx, my;
5663                        const int index= 4*i + block_width*j;
5664                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5665                        int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5666                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5667
5668                        mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5669                        my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5670                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5671
5672                        if(IS_SUB_8X8(sub_mb_type)){
5673                            mv_cache[ 1 ][0]=
5674                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5675                            mv_cache[ 1 ][1]=
5676                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5677
5678                            mvd_cache[ 1 ][0]=
5679                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5680                            mvd_cache[ 1 ][1]=
5681                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5682                        }else if(IS_SUB_8X4(sub_mb_type)){
5683                            mv_cache[ 1 ][0]= mx;
5684                            mv_cache[ 1 ][1]= my;
5685
5686                            mvd_cache[ 1 ][0]= mx - mpx;
5687                            mvd_cache[ 1 ][1]= my - mpy;
5688                        }else if(IS_SUB_4X8(sub_mb_type)){
5689                            mv_cache[ 8 ][0]= mx;
5690                            mv_cache[ 8 ][1]= my;
5691
5692                            mvd_cache[ 8 ][0]= mx - mpx;
5693                            mvd_cache[ 8 ][1]= my - mpy;
5694                        }
5695                        mv_cache[ 0 ][0]= mx;
5696                        mv_cache[ 0 ][1]= my;
5697
5698                        mvd_cache[ 0 ][0]= mx - mpx;
5699                        mvd_cache[ 0 ][1]= my - mpy;
5700                    }
5701                }else{
5702                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5703                    uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5704                    p[0] = p[1] = p[8] = p[9] = 0;
5705                    pd[0]= pd[1]= pd[8]= pd[9]= 0;
5706                }
5707            }
5708        }
5709    } else if( IS_DIRECT(mb_type) ) {
5710        pred_direct_motion(h, &mb_type);
5711        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5712        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5713        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5714    } else {
5715        int list, mx, my, i, mpx, mpy;
5716        if(IS_16X16(mb_type)){
5717            for(list=0; list<h->list_count; list++){
5718                if(IS_DIR(mb_type, 0, list)){
5719                    int ref;
5720                    if(h->ref_count[list] > 1){
5721                        ref= decode_cabac_mb_ref(h, list, 0);
5722                        if(ref >= (unsigned)h->ref_count[list]){
5723                            av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5724                            return -1;
5725                        }
5726                    }else
5727                        ref=0;
5728                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5729                }else
5730                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5731            }
5732            for(list=0; list<h->list_count; list++){
5733                if(IS_DIR(mb_type, 0, list)){
5734                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5735
5736                    mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5737                    my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5738                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5739
5740                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5741                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5742                }else
5743                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5744            }
5745        }
5746        else if(IS_16X8(mb_type)){
5747            for(list=0; list<h->list_count; list++){
5748                    for(i=0; i<2; i++){
5749                        if(IS_DIR(mb_type, i, list)){
5750                            int ref;
5751                            if(h->ref_count[list] > 1){
5752                                ref= decode_cabac_mb_ref( h, list, 8*i );
5753                                if(ref >= (unsigned)h->ref_count[list]){
5754                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5755                                    return -1;
5756                                }
5757                            }else
5758                                ref=0;
5759                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5760                        }else
5761                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5762                    }
5763            }
5764            for(list=0; list<h->list_count; list++){
5765                for(i=0; i<2; i++){
5766                    if(IS_DIR(mb_type, i, list)){
5767                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5768                        mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5769                        my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5770                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5771
5772                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5773                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5774                    }else{
5775                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5776                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5777                    }
5778                }
5779            }
5780        }else{
5781            assert(IS_8X16(mb_type));
5782            for(list=0; list<h->list_count; list++){
5783                    for(i=0; i<2; i++){
5784                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5785                            int ref;
5786                            if(h->ref_count[list] > 1){
5787                                ref= decode_cabac_mb_ref( h, list, 4*i );
5788                                if(ref >= (unsigned)h->ref_count[list]){
5789                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5790                                    return -1;
5791                                }
5792                            }else
5793                                ref=0;
5794                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5795                        }else
5796                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5797                    }
5798            }
5799            for(list=0; list<h->list_count; list++){
5800                for(i=0; i<2; i++){
5801                    if(IS_DIR(mb_type, i, list)){
5802                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5803                        mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5804                        my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5805
5806                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5807                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5808                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5809                    }else{
5810                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5811                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5812                    }
5813                }
5814            }
5815        }
5816    }
5817
5818   if( IS_INTER( mb_type ) ) {
5819        h->chroma_pred_mode_table[mb_xy] = 0;
5820        write_back_motion( h, mb_type );
5821   }
5822
5823    if( !IS_INTRA16x16( mb_type ) ) {
5824        cbp  = decode_cabac_mb_cbp_luma( h );
5825        if(CHROMA)
5826            cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5827    }
5828
5829    h->cbp_table[mb_xy] = h->cbp = cbp;
5830
5831    if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5832        if( decode_cabac_mb_transform_size( h ) )
5833            mb_type |= MB_TYPE_8x8DCT;
5834    }
5835    s->current_picture.mb_type[mb_xy]= mb_type;
5836
5837    if( cbp || IS_INTRA16x16( mb_type ) ) {
5838        const uint8_t *scan, *scan8x8, *dc_scan;
5839        const uint32_t *qmul;
5840        int dqp;
5841
5842        if(IS_INTERLACED(mb_type)){
5843            scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5844            scan= s->qscale ? h->field_scan : h->field_scan_q0;
5845            dc_scan= luma_dc_field_scan;
5846        }else{
5847            scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5848            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5849            dc_scan= luma_dc_zigzag_scan;
5850        }
5851
5852        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5853        if( dqp == INT_MIN ){
5854            av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5855            return -1;
5856        }
5857        s->qscale += dqp;
5858        if(((unsigned)s->qscale) > 51){
5859            if(s->qscale<0) s->qscale+= 52;
5860            else            s->qscale-= 52;
5861        }
5862        h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5863        h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5864
5865        if( IS_INTRA16x16( mb_type ) ) {
5866            int i;
5867            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5868            decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5869
5870            if( cbp&15 ) {
5871                qmul = h->dequant4_coeff[0][s->qscale];
5872                for( i = 0; i < 16; i++ ) {
5873                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5874                    decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5875                }
5876            } else {
5877                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5878            }
5879        } else {
5880            int i8x8, i4x4;
5881            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5882                if( cbp & (1<<i8x8) ) {
5883                    if( IS_8x8DCT(mb_type) ) {
5884                        decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5885                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5886                    } else {
5887                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5888                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5889                            const int index = 4*i8x8 + i4x4;
5890                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5891//START_TIMER
5892                            decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5893//STOP_TIMER("decode_residual")
5894                        }
5895                    }
5896                } else {
5897                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5898                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5899                }
5900            }
5901        }
5902
5903        if( cbp&0x30 ){
5904            int c;
5905            for( c = 0; c < 2; c++ ) {
5906                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5907                decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5908            }
5909        }
5910
5911        if( cbp&0x20 ) {
5912            int c, i;
5913            for( c = 0; c < 2; c++ ) {
5914                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5915                for( i = 0; i < 4; i++ ) {
5916                    const int index = 16 + 4 * c + i;
5917                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5918                    decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5919                }
5920            }
5921        } else {
5922            uint8_t * const nnz= &h->non_zero_count_cache[0];
5923            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5924            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5925        }
5926    } else {
5927        uint8_t * const nnz= &h->non_zero_count_cache[0];
5928        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5929        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5930        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5931        h->last_qscale_diff = 0;
5932    }
5933
5934    s->current_picture.qscale_table[mb_xy]= s->qscale;
5935    write_back_non_zero_count(h);
5936
5937    if(MB_MBAFF){
5938        h->ref_count[0] >>= 1;
5939        h->ref_count[1] >>= 1;
5940    }
5941
5942    return 0;
5943}
5944
5945
5946static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5947    const int index_a = qp + h->slice_alpha_c0_offset;
5948    const int alpha = (alpha_table+52)[index_a];
5949    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5950
5951    if( bS[0] < 4 ) {
5952        int8_t tc[4];
5953        tc[0] = (tc0_table+52)[index_a][bS[0]];
5954        tc[1] = (tc0_table+52)[index_a][bS[1]];
5955        tc[2] = (tc0_table+52)[index_a][bS[2]];
5956        tc[3] = (tc0_table+52)[index_a][bS[3]];
5957        h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5958    } else {
5959        h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5960    }
5961}
5962static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5963    const int index_a = qp + h->slice_alpha_c0_offset;
5964    const int alpha = (alpha_table+52)[index_a];
5965    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
5966
5967    if( bS[0] < 4 ) {
5968        int8_t tc[4];
5969        tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5970        tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5971        tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5972        tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5973        h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5974    } else {
5975        h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5976    }
5977}
5978
5979static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5980    int i;
5981    for( i = 0; i < 16; i++, pix += stride) {
5982        int index_a;
5983        int alpha;
5984        int beta;
5985
5986        int qp_index;
5987        int bS_index = (i >> 1);
5988        if (!MB_FIELD) {
5989            bS_index &= ~1;
5990            bS_index |= (i & 1);
5991        }
5992
5993        if( bS[bS_index] == 0 ) {
5994            continue;
5995        }
5996
5997        qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5998        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5999        alpha = (alpha_table+52)[index_a];
6000        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6001
6002        if( bS[bS_index] < 4 ) {
6003            const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6004            const int p0 = pix[-1];
6005            const int p1 = pix[-2];
6006            const int p2 = pix[-3];
6007            const int q0 = pix[0];
6008            const int q1 = pix[1];
6009            const int q2 = pix[2];
6010
6011            if( FFABS( p0 - q0 ) < alpha &&
6012                FFABS( p1 - p0 ) < beta &&
6013                FFABS( q1 - q0 ) < beta ) {
6014                int tc = tc0;
6015                int i_delta;
6016
6017                if( FFABS( p2 - p0 ) < beta ) {
6018                    pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6019                    tc++;
6020                }
6021                if( FFABS( q2 - q0 ) < beta ) {
6022                    pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6023                    tc++;
6024                }
6025
6026                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6027                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
6028                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
6029                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6030            }
6031        }else{
6032            const int p0 = pix[-1];
6033            const int p1 = pix[-2];
6034            const int p2 = pix[-3];
6035
6036            const int q0 = pix[0];
6037            const int q1 = pix[1];
6038            const int q2 = pix[2];
6039
6040            if( FFABS( p0 - q0 ) < alpha &&
6041                FFABS( p1 - p0 ) < beta &&
6042                FFABS( q1 - q0 ) < beta ) {
6043
6044                if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6045                    if( FFABS( p2 - p0 ) < beta)
6046                    {
6047                        const int p3 = pix[-4];
6048                        /* p0', p1', p2' */
6049                        pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6050                        pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6051                        pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6052                    } else {
6053                        /* p0' */
6054                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6055                    }
6056                    if( FFABS( q2 - q0 ) < beta)
6057                    {
6058                        const int q3 = pix[3];
6059                        /* q0', q1', q2' */
6060                        pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6061                        pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6062                        pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6063                    } else {
6064                        /* q0' */
6065                        pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6066                    }
6067                }else{
6068                    /* p0', q0' */
6069                    pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6070                    pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6071                }
6072                tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6073            }
6074        }
6075    }
6076}
6077static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6078    int i;
6079    for( i = 0; i < 8; i++, pix += stride) {
6080        int index_a;
6081        int alpha;
6082        int beta;
6083
6084        int qp_index;
6085        int bS_index = i;
6086
6087        if( bS[bS_index] == 0 ) {
6088            continue;
6089        }
6090
6091        qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6092        index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6093        alpha = (alpha_table+52)[index_a];
6094        beta  = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6095
6096        if( bS[bS_index] < 4 ) {
6097            const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6098            const int p0 = pix[-1];
6099            const int p1 = pix[-2];
6100            const int q0 = pix[0];
6101            const int q1 = pix[1];
6102
6103            if( FFABS( p0 - q0 ) < alpha &&
6104                FFABS( p1 - p0 ) < beta &&
6105                FFABS( q1 - q0 ) < beta ) {
6106                const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6107
6108                pix[-1] = av_clip_uint8( p0 + i_delta );    /* p0' */
6109                pix[0]  = av_clip_uint8( q0 - i_delta );    /* q0' */
6110                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6111            }
6112        }else{
6113            const int p0 = pix[-1];
6114            const int p1 = pix[-2];
6115            const int q0 = pix[0];
6116            const int q1 = pix[1];
6117
6118            if( FFABS( p0 - q0 ) < alpha &&
6119                FFABS( p1 - p0 ) < beta &&
6120                FFABS( q1 - q0 ) < beta ) {
6121
6122                pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
6123                pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
6124                tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6125            }
6126        }
6127    }
6128}
6129
6130static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6131    const int index_a = qp + h->slice_alpha_c0_offset;
6132    const int alpha = (alpha_table+52)[index_a];
6133    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6134
6135    if( bS[0] < 4 ) {
6136        int8_t tc[4];
6137        tc[0] = (tc0_table+52)[index_a][bS[0]];
6138        tc[1] = (tc0_table+52)[index_a][bS[1]];
6139        tc[2] = (tc0_table+52)[index_a][bS[2]];
6140        tc[3] = (tc0_table+52)[index_a][bS[3]];
6141        h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6142    } else {
6143        h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6144    }
6145}
6146
6147static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6148    const int index_a = qp + h->slice_alpha_c0_offset;
6149    const int alpha = (alpha_table+52)[index_a];
6150    const int beta  = (beta_table+52)[qp + h->slice_beta_offset];
6151
6152    if( bS[0] < 4 ) {
6153        int8_t tc[4];
6154        tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6155        tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6156        tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6157        tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6158        h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6159    } else {
6160        h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6161    }
6162}
6163
6164static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6165    MpegEncContext * const s = &h->s;
6166    int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6167    int mb_xy, mb_type;
6168    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6169
6170    mb_xy = h->mb_xy;
6171
6172    if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6173        !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6174       (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6175                                      h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6176        filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6177        return;
6178    }
6179    assert(!FRAME_MBAFF);
6180
6181    mb_type = s->current_picture.mb_type[mb_xy];
6182    qp = s->current_picture.qscale_table[mb_xy];
6183    qp0 = s->current_picture.qscale_table[mb_xy-1];
6184    qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6185    qpc = get_chroma_qp( h, 0, qp );
6186    qpc0 = get_chroma_qp( h, 0, qp0 );
6187    qpc1 = get_chroma_qp( h, 0, qp1 );
6188    qp0 = (qp + qp0 + 1) >> 1;
6189    qp1 = (qp + qp1 + 1) >> 1;
6190    qpc0 = (qpc + qpc0 + 1) >> 1;
6191    qpc1 = (qpc + qpc1 + 1) >> 1;
6192    qp_thresh = 15 - h->slice_alpha_c0_offset;
6193    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6194       qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6195        return;
6196
6197    if( IS_INTRA(mb_type) ) {
6198        int16_t bS4[4] = {4,4,4,4};
6199        int16_t bS3[4] = {3,3,3,3};
6200        int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6201        if( IS_8x8DCT(mb_type) ) {
6202            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6203            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6204            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6205            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6206        } else {
6207            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6208            filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6209            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6210            filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6211            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6212            filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6213            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6214            filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6215        }
6216        filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6217        filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6218        filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6219        filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6220        filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6221        filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6222        filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6223        filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6224        return;
6225    } else {
6226        DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6227        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6228        int edges;
6229        if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6230            edges = 4;
6231            bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6232        } else {
6233            int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6234                             (mb_type & MB_TYPE_16x8) ? 1 : 0;
6235            int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6236                             && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6237                             ? 3 : 0;
6238            int step = IS_8x8DCT(mb_type) ? 2 : 1;
6239            edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6240            s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6241                                              (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6242        }
6243        if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6244            bSv[0][0] = 0x0004000400040004ULL;
6245        if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6246            bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6247
6248#define FILTER(hv,dir,edge)\
6249        if(bSv[dir][edge]) {\
6250            filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6251            if(!(edge&1)) {\
6252                filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6253                filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6254            }\
6255        }
6256        if( edges == 1 ) {
6257            FILTER(v,0,0);
6258            FILTER(h,1,0);
6259        } else if( IS_8x8DCT(mb_type) ) {
6260            FILTER(v,0,0);
6261            FILTER(v,0,2);
6262            FILTER(h,1,0);
6263            FILTER(h,1,2);
6264        } else {
6265            FILTER(v,0,0);
6266            FILTER(v,0,1);
6267            FILTER(v,0,2);
6268            FILTER(v,0,3);
6269            FILTER(h,1,0);
6270            FILTER(h,1,1);
6271            FILTER(h,1,2);
6272            FILTER(h,1,3);
6273        }
6274#undef FILTER
6275    }
6276}
6277
6278
6279static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6280    MpegEncContext * const s = &h->s;
6281    int edge;
6282    const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6283    const int mbm_type = s->current_picture.mb_type[mbm_xy];
6284    int (*ref2frm) [64] = h->ref2frm[ h->slice_num          &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6285    int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6286    int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6287
6288    const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6289                              == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6290    // how often to recheck mv-based bS when iterating between edges
6291    const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6292                          (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6293    // how often to recheck mv-based bS when iterating along each edge
6294    const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6295
6296    if (first_vertical_edge_done) {
6297        start = 1;
6298    }
6299
6300    if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6301        start = 1;
6302
6303    if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6304        && !IS_INTERLACED(mb_type)
6305        && IS_INTERLACED(mbm_type)
6306        ) {
6307        // This is a special case in the norm where the filtering must
6308        // be done twice (one each of the field) even if we are in a
6309        // frame macroblock.
6310        //
6311        static const int nnz_idx[4] = {4,5,6,3};
6312        unsigned int tmp_linesize   = 2 *   linesize;
6313        unsigned int tmp_uvlinesize = 2 * uvlinesize;
6314        int mbn_xy = mb_xy - 2 * s->mb_stride;
6315        int qp;
6316        int i, j;
6317        int16_t bS[4];
6318
6319        for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6320            if( IS_INTRA(mb_type) ||
6321                IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6322                bS[0] = bS[1] = bS[2] = bS[3] = 3;
6323            } else {
6324                const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6325                for( i = 0; i < 4; i++ ) {
6326                    if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6327                        mbn_nnz[nnz_idx[i]] != 0 )
6328                        bS[i] = 2;
6329                    else
6330                        bS[i] = 1;
6331                }
6332            }
6333            // Do not use s->qscale as luma quantizer because it has not the same
6334            // value in IPCM macroblocks.
6335            qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6336            tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6337            { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6338            filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6339            filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6340                              ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6341            filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6342                              ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6343        }
6344
6345        start = 1;
6346    }
6347
6348    /* Calculate bS */
6349    for( edge = start; edge < edges; edge++ ) {
6350        /* mbn_xy: neighbor macroblock */
6351        const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6352        const int mbn_type = s->current_picture.mb_type[mbn_xy];
6353        int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6354        int16_t bS[4];
6355        int qp;
6356
6357        if( (edge&1) && IS_8x8DCT(mb_type) )
6358            continue;
6359
6360        if( IS_INTRA(mb_type) ||
6361            IS_INTRA(mbn_type) ) {
6362            int value;
6363            if (edge == 0) {
6364                if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6365                    || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6366                ) {
6367                    value = 4;
6368                } else {
6369                    value = 3;
6370                }
6371            } else {
6372                value = 3;
6373            }
6374            bS[0] = bS[1] = bS[2] = bS[3] = value;
6375        } else {
6376            int i, l;
6377            int mv_done;
6378
6379            if( edge & mask_edge ) {
6380                bS[0] = bS[1] = bS[2] = bS[3] = 0;
6381                mv_done = 1;
6382            }
6383            else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6384                bS[0] = bS[1] = bS[2] = bS[3] = 1;
6385                mv_done = 1;
6386            }
6387            else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6388                int b_idx= 8 + 4 + edge * (dir ? 8:1);
6389                int bn_idx= b_idx - (dir ? 8:1);
6390                int v = 0;
6391
6392                for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6393                    v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6394                         FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6395                         FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6396                }
6397
6398                if(h->slice_type_nos == FF_B_TYPE && v){
6399                    v=0;
6400                    for( l = 0; !v && l < 2; l++ ) {
6401                        int ln= 1-l;
6402                        v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6403                            FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6404                            FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6405                    }
6406                }
6407
6408                bS[0] = bS[1] = bS[2] = bS[3] = v;
6409                mv_done = 1;
6410            }
6411            else
6412                mv_done = 0;
6413
6414            for( i = 0; i < 4; i++ ) {
6415                int x = dir == 0 ? edge : i;
6416                int y = dir == 0 ? i    : edge;
6417                int b_idx= 8 + 4 + x + 8*y;
6418                int bn_idx= b_idx - (dir ? 8:1);
6419
6420                if( h->non_zero_count_cache[b_idx] |
6421                    h->non_zero_count_cache[bn_idx] ) {
6422                    bS[i] = 2;
6423                }
6424                else if(!mv_done)
6425                {
6426                    bS[i] = 0;
6427                    for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6428                        if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6429                            FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6430                            FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6431                            bS[i] = 1;
6432                            break;
6433                        }
6434                    }
6435
6436                    if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6437                        bS[i] = 0;
6438                        for( l = 0; l < 2; l++ ) {
6439                            int ln= 1-l;
6440                            if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6441                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6442                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6443                                bS[i] = 1;
6444                                break;
6445                            }
6446                        }
6447                    }
6448                }
6449            }
6450
6451            if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6452                continue;
6453        }
6454
6455        /* Filter edge */
6456        // Do not use s->qscale as luma quantizer because it has not the same
6457        // value in IPCM macroblocks.
6458        qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6459        //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6460        tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6461        { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6462        if( dir == 0 ) {
6463            filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6464            if( (edge&1) == 0 ) {
6465                filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6466                                  ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6467                filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6468                                  ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6469            }
6470        } else {
6471            filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6472            if( (edge&1) == 0 ) {
6473                filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6474                                  ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6475                filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6476                                  ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6477            }
6478        }
6479    }
6480}
6481
6482static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6483    MpegEncContext * const s = &h->s;
6484    const int mb_xy= mb_x + mb_y*s->mb_stride;
6485    const int mb_type = s->current_picture.mb_type[mb_xy];
6486    const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6487    int first_vertical_edge_done = 0;
6488    av_unused int dir;
6489
6490    //for sufficiently low qp, filtering wouldn't do anything
6491    //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6492    if(!FRAME_MBAFF){
6493        int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6494        int qp = s->current_picture.qscale_table[mb_xy];
6495        if(qp <= qp_thresh
6496           && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6497           && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6498            return;
6499        }
6500    }
6501
6502    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6503    if(!h->pps.cabac && h->pps.transform_8x8_mode){
6504        int top_type, left_type[2];
6505        top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
6506        left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6507        left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6508
6509        if(IS_8x8DCT(top_type)){
6510            h->non_zero_count_cache[4+8*0]=
6511            h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6512            h->non_zero_count_cache[6+8*0]=
6513            h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6514        }
6515        if(IS_8x8DCT(left_type[0])){
6516            h->non_zero_count_cache[3+8*1]=
6517            h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6518        }
6519        if(IS_8x8DCT(left_type[1])){
6520            h->non_zero_count_cache[3+8*3]=
6521            h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6522        }
6523
6524        if(IS_8x8DCT(mb_type)){
6525            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
6526            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
6527
6528            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6529            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6530
6531            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6532            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6533
6534            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6535            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6536        }
6537    }
6538
6539    if (FRAME_MBAFF
6540            // left mb is in picture
6541            && h->slice_table[mb_xy-1] != 0xFFFF
6542            // and current and left pair do not have the same interlaced type
6543            && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6544            // and left mb is in the same slice if deblocking_filter == 2
6545            && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6546        /* First vertical edge is different in MBAFF frames
6547         * There are 8 different bS to compute and 2 different Qp
6548         */
6549        const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6550        const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6551        int16_t bS[8];
6552        int qp[2];
6553        int bqp[2];
6554        int rqp[2];
6555        int mb_qp, mbn0_qp, mbn1_qp;
6556        int i;
6557        first_vertical_edge_done = 1;
6558
6559        if( IS_INTRA(mb_type) )
6560            bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6561        else {
6562            for( i = 0; i < 8; i++ ) {
6563                int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6564
6565                if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6566                    bS[i] = 4;
6567                else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6568                         ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6569                            (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6570                                                                       :
6571                            h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6572                    bS[i] = 2;
6573                else
6574                    bS[i] = 1;
6575            }
6576        }
6577
6578        mb_qp = s->current_picture.qscale_table[mb_xy];
6579        mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6580        mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6581        qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6582        bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6583                   get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6584        rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6585                   get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6586        qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6587        bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6588                   get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6589        rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6590                   get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6591
6592        /* Filter edge */
6593        tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6594        { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6595        filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
6596        filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6597        filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6598    }
6599
6600#if CONFIG_SMALL
6601    for( dir = 0; dir < 2; dir++ )
6602        filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6603#else
6604    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6605    filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6606#endif
6607}
6608
6609static int decode_slice(struct AVCodecContext *avctx, void *arg){
6610    H264Context *h = *(void**)arg;
6611    MpegEncContext * const s = &h->s;
6612    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6613
6614    s->mb_skip_run= -1;
6615
6616    h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6617                    (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6618
6619    if( h->pps.cabac ) {
6620        int i;
6621
6622        /* realign */
6623        align_get_bits( &s->gb );
6624
6625        /* init cabac */
6626        ff_init_cabac_states( &h->cabac);
6627        ff_init_cabac_decoder( &h->cabac,
6628                               s->gb.buffer + get_bits_count(&s->gb)/8,
6629                               ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6630        /* calculate pre-state */
6631        for( i= 0; i < 460; i++ ) {
6632            int pre;
6633            if( h->slice_type_nos == FF_I_TYPE )
6634                pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6635            else
6636                pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6637
6638            if( pre <= 63 )
6639                h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6640            else
6641                h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6642        }
6643
6644        for(;;){
6645//START_TIMER
6646            int ret = decode_mb_cabac(h);
6647            int eos;
6648//STOP_TIMER("decode_mb_cabac")
6649
6650            if(ret>=0) hl_decode_mb(h);
6651
6652            if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6653                s->mb_y++;
6654
6655                ret = decode_mb_cabac(h);
6656
6657                if(ret>=0) hl_decode_mb(h);
6658                s->mb_y--;
6659            }
6660            eos = get_cabac_terminate( &h->cabac );
6661
6662            if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6663                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6664                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6665                return -1;
6666            }
6667
6668            if( ++s->mb_x >= s->mb_width ) {
6669                s->mb_x = 0;
6670                ff_draw_horiz_band(s, 16*s->mb_y, 16);
6671                ++s->mb_y;
6672                if(FIELD_OR_MBAFF_PICTURE) {
6673                    ++s->mb_y;
6674                }
6675            }
6676
6677            if( eos || s->mb_y >= s->mb_height ) {
6678                tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6679                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6680                return 0;
6681            }
6682        }
6683
6684    } else {
6685        for(;;){
6686            int ret = decode_mb_cavlc(h);
6687
6688            if(ret>=0) hl_decode_mb(h);
6689
6690            if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6691                s->mb_y++;
6692                ret = decode_mb_cavlc(h);
6693
6694                if(ret>=0) hl_decode_mb(h);
6695                s->mb_y--;
6696            }
6697
6698            if(ret<0){
6699                av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6700                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6701
6702                return -1;
6703            }
6704
6705            if(++s->mb_x >= s->mb_width){
6706                s->mb_x=0;
6707                ff_draw_horiz_band(s, 16*s->mb_y, 16);
6708                ++s->mb_y;
6709                if(FIELD_OR_MBAFF_PICTURE) {
6710                    ++s->mb_y;
6711                }
6712                if(s->mb_y >= s->mb_height){
6713                    tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6714
6715                    if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6716                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6717
6718                        return 0;
6719                    }else{
6720                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6721
6722                        return -1;
6723                    }
6724                }
6725            }
6726
6727            if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6728                tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6729                if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6730                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6731
6732                    return 0;
6733                }else{
6734                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6735
6736                    return -1;
6737                }
6738            }
6739        }
6740    }
6741
6742#if 0
6743    for(;s->mb_y < s->mb_height; s->mb_y++){
6744        for(;s->mb_x < s->mb_width; s->mb_x++){
6745            int ret= decode_mb(h);
6746
6747            hl_decode_mb(h);
6748
6749            if(ret<0){
6750                av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6751                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6752
6753                return -1;
6754            }
6755
6756            if(++s->mb_x >= s->mb_width){
6757                s->mb_x=0;
6758                if(++s->mb_y >= s->mb_height){
6759                    if(get_bits_count(s->gb) == s->gb.size_in_bits){
6760                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6761
6762                        return 0;
6763                    }else{
6764                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6765
6766                        return -1;
6767                    }
6768                }
6769            }
6770
6771            if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6772                if(get_bits_count(s->gb) == s->gb.size_in_bits){
6773                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6774
6775                    return 0;
6776                }else{
6777                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6778
6779                    return -1;
6780                }
6781            }
6782        }
6783        s->mb_x=0;
6784        ff_draw_horiz_band(s, 16*s->mb_y, 16);
6785    }
6786#endif
6787    return -1; //not reached
6788}
6789
6790static int decode_picture_timing(H264Context *h){
6791    MpegEncContext * const s = &h->s;
6792    if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6793        h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6794        h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6795    }
6796    if(h->sps.pic_struct_present_flag){
6797        unsigned int i, num_clock_ts;
6798        h->sei_pic_struct = get_bits(&s->gb, 4);
6799
6800        if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6801            return -1;
6802
6803        num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6804
6805        for (i = 0 ; i < num_clock_ts ; i++){
6806            if(get_bits(&s->gb, 1)){                  /* clock_timestamp_flag */
6807                unsigned int full_timestamp_flag;
6808                skip_bits(&s->gb, 2);                 /* ct_type */
6809                skip_bits(&s->gb, 1);                 /* nuit_field_based_flag */
6810                skip_bits(&s->gb, 5);                 /* counting_type */
6811                full_timestamp_flag = get_bits(&s->gb, 1);
6812                skip_bits(&s->gb, 1);                 /* discontinuity_flag */
6813                skip_bits(&s->gb, 1);                 /* cnt_dropped_flag */
6814                skip_bits(&s->gb, 8);                 /* n_frames */
6815                if(full_timestamp_flag){
6816                    skip_bits(&s->gb, 6);             /* seconds_value 0..59 */
6817                    skip_bits(&s->gb, 6);             /* minutes_value 0..59 */
6818                    skip_bits(&s->gb, 5);             /* hours_value 0..23 */
6819                }else{
6820                    if(get_bits(&s->gb, 1)){          /* seconds_flag */
6821                        skip_bits(&s->gb, 6);         /* seconds_value range 0..59 */
6822                        if(get_bits(&s->gb, 1)){      /* minutes_flag */
6823                            skip_bits(&s->gb, 6);     /* minutes_value 0..59 */
6824                            if(get_bits(&s->gb, 1))   /* hours_flag */
6825                                skip_bits(&s->gb, 5); /* hours_value 0..23 */
6826                        }
6827                    }
6828                }
6829                if(h->sps.time_offset_length > 0)
6830                    skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6831            }
6832        }
6833    }
6834    return 0;
6835}
6836
6837static int decode_unregistered_user_data(H264Context *h, int size){
6838    MpegEncContext * const s = &h->s;
6839    uint8_t user_data[16+256];
6840    int e, build, i;
6841
6842    if(size<16)
6843        return -1;
6844
6845    for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6846        user_data[i]= get_bits(&s->gb, 8);
6847    }
6848
6849    user_data[i]= 0;
6850    e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6851    if(e==1 && build>=0)
6852        h->x264_build= build;
6853
6854    if(s->avctx->debug & FF_DEBUG_BUGS)
6855        av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6856
6857    for(; i<size; i++)
6858        skip_bits(&s->gb, 8);
6859
6860    return 0;
6861}
6862
6863static int decode_recovery_point(H264Context *h){
6864    MpegEncContext * const s = &h->s;
6865
6866    h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6867    skip_bits(&s->gb, 4);       /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6868
6869    return 0;
6870}
6871
6872static int decode_buffering_period(H264Context *h){
6873    MpegEncContext * const s = &h->s;
6874    unsigned int sps_id;
6875    int sched_sel_idx;
6876    SPS *sps;
6877
6878    sps_id = get_ue_golomb_31(&s->gb);
6879    if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6880        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6881        return -1;
6882    }
6883    sps = h->sps_buffers[sps_id];
6884
6885    // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6886    if (sps->nal_hrd_parameters_present_flag) {
6887        for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6888            h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6889            skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6890        }
6891    }
6892    if (sps->vcl_hrd_parameters_present_flag) {
6893        for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6894            h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6895            skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6896        }
6897    }
6898
6899    h->sei_buffering_period_present = 1;
6900    return 0;
6901}
6902
6903int ff_h264_decode_sei(H264Context *h){
6904    MpegEncContext * const s = &h->s;
6905
6906    while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6907        int size, type;
6908
6909        type=0;
6910        do{
6911            type+= show_bits(&s->gb, 8);
6912        }while(get_bits(&s->gb, 8) == 255);
6913
6914        size=0;
6915        do{
6916            size+= show_bits(&s->gb, 8);
6917        }while(get_bits(&s->gb, 8) == 255);
6918
6919        switch(type){
6920        case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6921            if(decode_picture_timing(h) < 0)
6922                return -1;
6923            break;
6924        case SEI_TYPE_USER_DATA_UNREGISTERED:
6925            if(decode_unregistered_user_data(h, size) < 0)
6926                return -1;
6927            break;
6928        case SEI_TYPE_RECOVERY_POINT:
6929            if(decode_recovery_point(h) < 0)
6930                return -1;
6931            break;
6932        case SEI_BUFFERING_PERIOD:
6933            if(decode_buffering_period(h) < 0)
6934                return -1;
6935            break;
6936        default:
6937            skip_bits(&s->gb, 8*size);
6938        }
6939
6940        //FIXME check bits here
6941        align_get_bits(&s->gb);
6942    }
6943
6944    return 0;
6945}
6946
6947static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6948    MpegEncContext * const s = &h->s;
6949    int cpb_count, i;
6950    cpb_count = get_ue_golomb_31(&s->gb) + 1;
6951
6952    if(cpb_count > 32U){
6953        av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6954        return -1;
6955    }
6956
6957    get_bits(&s->gb, 4); /* bit_rate_scale */
6958    get_bits(&s->gb, 4); /* cpb_size_scale */
6959    for(i=0; i<cpb_count; i++){
6960        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6961        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6962        get_bits1(&s->gb);     /* cbr_flag */
6963    }
6964    sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6965    sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6966    sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6967    sps->time_offset_length = get_bits(&s->gb, 5);
6968    sps->cpb_cnt = cpb_count;
6969    return 0;
6970}
6971
6972static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6973    MpegEncContext * const s = &h->s;
6974    int aspect_ratio_info_present_flag;
6975    unsigned int aspect_ratio_idc;
6976
6977    aspect_ratio_info_present_flag= get_bits1(&s->gb);
6978
6979    if( aspect_ratio_info_present_flag ) {
6980        aspect_ratio_idc= get_bits(&s->gb, 8);
6981        if( aspect_ratio_idc == EXTENDED_SAR ) {
6982            sps->sar.num= get_bits(&s->gb, 16);
6983            sps->sar.den= get_bits(&s->gb, 16);
6984        }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6985            sps->sar=  pixel_aspect[aspect_ratio_idc];
6986        }else{
6987            av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6988            return -1;
6989        }
6990    }else{
6991        sps->sar.num=
6992        sps->sar.den= 0;
6993    }
6994//            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6995
6996    if(get_bits1(&s->gb)){      /* overscan_info_present_flag */
6997        get_bits1(&s->gb);      /* overscan_appropriate_flag */
6998    }
6999
7000    if(get_bits1(&s->gb)){      /* video_signal_type_present_flag */
7001        get_bits(&s->gb, 3);    /* video_format */
7002        get_bits1(&s->gb);      /* video_full_range_flag */
7003        if(get_bits1(&s->gb)){  /* colour_description_present_flag */
7004            get_bits(&s->gb, 8); /* colour_primaries */
7005            get_bits(&s->gb, 8); /* transfer_characteristics */
7006            get_bits(&s->gb, 8); /* matrix_coefficients */
7007        }
7008    }
7009
7010    if(get_bits1(&s->gb)){      /* chroma_location_info_present_flag */
7011        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_top_field */
7012        get_ue_golomb(&s->gb);  /* chroma_sample_location_type_bottom_field */
7013    }
7014
7015    sps->timing_info_present_flag = get_bits1(&s->gb);
7016    if(sps->timing_info_present_flag){
7017        sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7018        sps->time_scale = get_bits_long(&s->gb, 32);
7019        if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
7020            av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick inavlid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
7021            return -1;
7022        }
7023        sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7024    }
7025
7026    sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7027    if(sps->nal_hrd_parameters_present_flag)
7028        if(decode_hrd_parameters(h, sps) < 0)
7029            return -1;
7030    sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7031    if(sps->vcl_hrd_parameters_present_flag)
7032        if(decode_hrd_parameters(h, sps) < 0)
7033            return -1;
7034    if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7035        get_bits1(&s->gb);     /* low_delay_hrd_flag */
7036    sps->pic_struct_present_flag = get_bits1(&s->gb);
7037
7038    sps->bitstream_restriction_flag = get_bits1(&s->gb);
7039    if(sps->bitstream_restriction_flag){
7040        get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
7041        get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7042        get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7043        get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7044        get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7045        sps->num_reorder_frames= get_ue_golomb(&s->gb);
7046        get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7047
7048        if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7049            av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7050            return -1;
7051        }
7052    }
7053
7054    return 0;
7055}
7056
7057static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7058                                const uint8_t *jvt_list, const uint8_t *fallback_list){
7059    MpegEncContext * const s = &h->s;
7060    int i, last = 8, next = 8;
7061    const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7062    if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7063        memcpy(factors, fallback_list, size*sizeof(uint8_t));
7064    else
7065    for(i=0;i<size;i++){
7066        if(next)
7067            next = (last + get_se_golomb(&s->gb)) & 0xff;
7068        if(!i && !next){ /* matrix not written, we use the preset one */
7069            memcpy(factors, jvt_list, size*sizeof(uint8_t));
7070            break;
7071        }
7072        last = factors[scan[i]] = next ? next : last;
7073    }
7074}
7075
7076static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7077                                   uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7078    MpegEncContext * const s = &h->s;
7079    int fallback_sps = !is_sps && sps->scaling_matrix_present;
7080    const uint8_t *fallback[4] = {
7081        fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7082        fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7083        fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7084        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7085    };
7086    if(get_bits1(&s->gb)){
7087        sps->scaling_matrix_present |= is_sps;
7088        decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7089        decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7090        decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7091        decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7092        decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7093        decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7094        if(is_sps || pps->transform_8x8_mode){
7095            decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
7096            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
7097        }
7098    }
7099}
7100
7101int ff_h264_decode_seq_parameter_set(H264Context *h){
7102    MpegEncContext * const s = &h->s;
7103    int profile_idc, level_idc;
7104    unsigned int sps_id;
7105    int i;
7106    SPS *sps;
7107
7108    profile_idc= get_bits(&s->gb, 8);
7109    get_bits1(&s->gb);   //constraint_set0_flag
7110    get_bits1(&s->gb);   //constraint_set1_flag
7111    get_bits1(&s->gb);   //constraint_set2_flag
7112    get_bits1(&s->gb);   //constraint_set3_flag
7113    get_bits(&s->gb, 4); // reserved
7114    level_idc= get_bits(&s->gb, 8);
7115    sps_id= get_ue_golomb_31(&s->gb);
7116
7117    if(sps_id >= MAX_SPS_COUNT) {
7118        av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7119        return -1;
7120    }
7121    sps= av_mallocz(sizeof(SPS));
7122    if(sps == NULL)
7123        return -1;
7124
7125    sps->profile_idc= profile_idc;
7126    sps->level_idc= level_idc;
7127
7128    memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7129    memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7130    sps->scaling_matrix_present = 0;
7131
7132    if(sps->profile_idc >= 100){ //high profile
7133        sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7134        if(sps->chroma_format_idc == 3)
7135            sps->residual_color_transform_flag = get_bits1(&s->gb);
7136        sps->bit_depth_luma   = get_ue_golomb(&s->gb) + 8;
7137        sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7138        sps->transform_bypass = get_bits1(&s->gb);
7139        decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7140    }else{
7141        sps->chroma_format_idc= 1;
7142    }
7143
7144    sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7145    sps->poc_type= get_ue_golomb_31(&s->gb);
7146
7147    if(sps->poc_type == 0){ //FIXME #define
7148        sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7149    } else if(sps->poc_type == 1){//FIXME #define
7150        sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7151        sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7152        sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7153        sps->poc_cycle_length                = get_ue_golomb(&s->gb);
7154
7155        if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7156            av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7157            goto fail;
7158        }
7159
7160        for(i=0; i<sps->poc_cycle_length; i++)
7161            sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7162    }else if(sps->poc_type != 2){
7163        av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7164        goto fail;
7165    }
7166
7167    sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7168    if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7169        av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7170        goto fail;
7171    }
7172    sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7173    sps->mb_width = get_ue_golomb(&s->gb) + 1;
7174    sps->mb_height= get_ue_golomb(&s->gb) + 1;
7175    if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7176       avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7177        av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7178        goto fail;
7179    }
7180
7181    sps->frame_mbs_only_flag= get_bits1(&s->gb);
7182    if(!sps->frame_mbs_only_flag)
7183        sps->mb_aff= get_bits1(&s->gb);
7184    else
7185        sps->mb_aff= 0;
7186
7187    sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7188
7189#ifndef ALLOW_INTERLACE
7190    if(sps->mb_aff)
7191        av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7192#endif
7193    sps->crop= get_bits1(&s->gb);
7194    if(sps->crop){
7195        sps->crop_left  = get_ue_golomb(&s->gb);
7196        sps->crop_right = get_ue_golomb(&s->gb);
7197        sps->crop_top   = get_ue_golomb(&s->gb);
7198        sps->crop_bottom= get_ue_golomb(&s->gb);
7199        if(sps->crop_left || sps->crop_top){
7200            av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7201        }
7202        if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7203            av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7204        }
7205    }else{
7206        sps->crop_left  =
7207        sps->crop_right =
7208        sps->crop_top   =
7209        sps->crop_bottom= 0;
7210    }
7211
7212    sps->vui_parameters_present_flag= get_bits1(&s->gb);
7213    if( sps->vui_parameters_present_flag )
7214        decode_vui_parameters(h, sps);
7215
7216    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7217        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7218               sps_id, sps->profile_idc, sps->level_idc,
7219               sps->poc_type,
7220               sps->ref_frame_count,
7221               sps->mb_width, sps->mb_height,
7222               sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7223               sps->direct_8x8_inference_flag ? "8B8" : "",
7224               sps->crop_left, sps->crop_right,
7225               sps->crop_top, sps->crop_bottom,
7226               sps->vui_parameters_present_flag ? "VUI" : "",
7227               ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7228               );
7229    }
7230
7231    av_free(h->sps_buffers[sps_id]);
7232    h->sps_buffers[sps_id]= sps;
7233    h->sps = *sps;
7234    return 0;
7235fail:
7236    av_free(sps);
7237    return -1;
7238}
7239
7240static void
7241build_qp_table(PPS *pps, int t, int index)
7242{
7243    int i;
7244    for(i = 0; i < 52; i++)
7245        pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7246}
7247
7248int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7249    MpegEncContext * const s = &h->s;
7250    unsigned int pps_id= get_ue_golomb(&s->gb);
7251    PPS *pps;
7252
7253    if(pps_id >= MAX_PPS_COUNT) {
7254        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7255        return -1;
7256    }
7257
7258    pps= av_mallocz(sizeof(PPS));
7259    if(pps == NULL)
7260        return -1;
7261    pps->sps_id= get_ue_golomb_31(&s->gb);
7262    if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7263        av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7264        goto fail;
7265    }
7266
7267    pps->cabac= get_bits1(&s->gb);
7268    pps->pic_order_present= get_bits1(&s->gb);
7269    pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7270    if(pps->slice_group_count > 1 ){
7271        pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7272        av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7273        switch(pps->mb_slice_group_map_type){
7274        case 0:
7275#if 0
7276|   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
7277|    run_length[ i ]                                |1  |ue(v)   |
7278#endif
7279            break;
7280        case 2:
7281#if 0
7282|   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
7283|{                                                  |   |        |
7284|    top_left_mb[ i ]                               |1  |ue(v)   |
7285|    bottom_right_mb[ i ]                           |1  |ue(v)   |
7286|   }                                               |   |        |
7287#endif
7288            break;
7289        case 3:
7290        case 4:
7291        case 5:
7292#if 0
7293|   slice_group_change_direction_flag               |1  |u(1)    |
7294|   slice_group_change_rate_minus1                  |1  |ue(v)   |
7295#endif
7296            break;
7297        case 6:
7298#if 0
7299|   slice_group_id_cnt_minus1                       |1  |ue(v)   |
7300|   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
7301|)                                                  |   |        |
7302|    slice_group_id[ i ]                            |1  |u(v)    |
7303#endif
7304            break;
7305        }
7306    }
7307    pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7308    pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7309    if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7310        av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7311        goto fail;
7312    }
7313
7314    pps->weighted_pred= get_bits1(&s->gb);
7315    pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7316    pps->init_qp= get_se_golomb(&s->gb) + 26;
7317    pps->init_qs= get_se_golomb(&s->gb) + 26;
7318    pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7319    pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7320    pps->constrained_intra_pred= get_bits1(&s->gb);
7321    pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7322
7323    pps->transform_8x8_mode= 0;
7324    h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7325    memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7326    memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7327
7328    if(get_bits_count(&s->gb) < bit_length){
7329        pps->transform_8x8_mode= get_bits1(&s->gb);
7330        decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7331        pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7332    } else {
7333        pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7334    }
7335
7336    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7337    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7338    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7339        h->pps.chroma_qp_diff= 1;
7340
7341    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7342        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7343               pps_id, pps->sps_id,
7344               pps->cabac ? "CABAC" : "CAVLC",
7345               pps->slice_group_count,
7346               pps->ref_count[0], pps->ref_count[1],
7347               pps->weighted_pred ? "weighted" : "",
7348               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7349               pps->deblocking_filter_parameters_present ? "LPAR" : "",
7350               pps->constrained_intra_pred ? "CONSTR" : "",
7351               pps->redundant_pic_cnt_present ? "REDU" : "",
7352               pps->transform_8x8_mode ? "8x8DCT" : ""
7353               );
7354    }
7355
7356    av_free(h->pps_buffers[pps_id]);
7357    h->pps_buffers[pps_id]= pps;
7358    return 0;
7359fail:
7360    av_free(pps);
7361    return -1;
7362}
7363
7364/**
7365 * Call decode_slice() for each context.
7366 *
7367 * @param h h264 master context
7368 * @param context_count number of contexts to execute
7369 */
7370static void execute_decode_slices(H264Context *h, int context_count){
7371    MpegEncContext * const s = &h->s;
7372    AVCodecContext * const avctx= s->avctx;
7373    H264Context *hx;
7374    int i;
7375
7376    if (s->avctx->hwaccel)
7377        return;
7378    if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7379        return;
7380    if(context_count == 1) {
7381        decode_slice(avctx, &h);
7382    } else {
7383        for(i = 1; i < context_count; i++) {
7384            hx = h->thread_context[i];
7385            hx->s.error_recognition = avctx->error_recognition;
7386            hx->s.error_count = 0;
7387        }
7388
7389        avctx->execute(avctx, (void *)decode_slice,
7390                       (void **)h->thread_context, NULL, context_count, sizeof(void*));
7391
7392        /* pull back stuff from slices to master context */
7393        hx = h->thread_context[context_count - 1];
7394        s->mb_x = hx->s.mb_x;
7395        s->mb_y = hx->s.mb_y;
7396        s->dropable = hx->s.dropable;
7397        s->picture_structure = hx->s.picture_structure;
7398        for(i = 1; i < context_count; i++)
7399            h->s.error_count += h->thread_context[i]->s.error_count;
7400    }
7401}
7402
7403
7404static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7405    MpegEncContext * const s = &h->s;
7406    AVCodecContext * const avctx= s->avctx;
7407    int buf_index=0;
7408    H264Context *hx; ///< thread context
7409    int context_count = 0;
7410
7411    h->max_contexts = avctx->thread_count;
7412#if 0
7413    int i;
7414    for(i=0; i<50; i++){
7415        av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7416    }
7417#endif
7418    if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7419        h->current_slice = 0;
7420        if (!s->first_field)
7421            s->current_picture_ptr= NULL;
7422        reset_sei(h);
7423    }
7424
7425    for(;;){
7426        int consumed;
7427        int dst_length;
7428        int bit_length;
7429        const uint8_t *ptr;
7430        int i, nalsize = 0;
7431        int err;
7432
7433        if(h->is_avc) {
7434            if(buf_index >= buf_size) break;
7435            nalsize = 0;
7436            for(i = 0; i < h->nal_length_size; i++)
7437                nalsize = (nalsize << 8) | buf[buf_index++];
7438            if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7439                if(nalsize == 1){
7440                    buf_index++;
7441                    continue;
7442                }else{
7443                    av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7444                    break;
7445                }
7446            }
7447        } else {
7448            // start code prefix search
7449            for(; buf_index + 3 < buf_size; buf_index++){
7450                // This should always succeed in the first iteration.
7451                if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7452                    break;
7453            }
7454
7455            if(buf_index+3 >= buf_size) break;
7456
7457            buf_index+=3;
7458        }
7459
7460        hx = h->thread_context[context_count];
7461
7462        ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7463        if (ptr==NULL || dst_length < 0){
7464            return -1;
7465        }
7466        while(ptr[dst_length - 1] == 0 && dst_length > 0)
7467            dst_length--;
7468        bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7469
7470        if(s->avctx->debug&FF_DEBUG_STARTCODE){
7471            av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7472        }
7473
7474        if (h->is_avc && (nalsize != consumed)){
7475            int i, debug_level = AV_LOG_DEBUG;
7476            for (i = consumed; i < nalsize; i++)
7477                if (buf[buf_index+i])
7478                    debug_level = AV_LOG_ERROR;
7479            av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7480            consumed= nalsize;
7481        }
7482
7483        buf_index += consumed;
7484
7485        if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME do not discard SEI id
7486           ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
7487            continue;
7488
7489      again:
7490        err = 0;
7491        switch(hx->nal_unit_type){
7492        case NAL_IDR_SLICE:
7493            if (h->nal_unit_type != NAL_IDR_SLICE) {
7494                av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7495                return -1;
7496            }
7497            idr(h); //FIXME ensure we don't loose some frames if there is reordering
7498        case NAL_SLICE:
7499            init_get_bits(&hx->s.gb, ptr, bit_length);
7500            hx->intra_gb_ptr=
7501            hx->inter_gb_ptr= &hx->s.gb;
7502            hx->s.data_partitioning = 0;
7503
7504            if((err = decode_slice_header(hx, h)))
7505               break;
7506
7507            if (s->avctx->hwaccel && h->current_slice == 1) {
7508                if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7509                    return -1;
7510            }
7511
7512            s->current_picture_ptr->key_frame |=
7513                    (hx->nal_unit_type == NAL_IDR_SLICE) ||
7514                    (h->sei_recovery_frame_cnt >= 0);
7515            if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7516               && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7517               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7518               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7519               && avctx->skip_frame < AVDISCARD_ALL){
7520                if(avctx->hwaccel) {
7521                    if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7522                        return -1;
7523                }else
7524                if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7525                    static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7526                    ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7527                    ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7528                }else
7529                    context_count++;
7530            }
7531            break;
7532        case NAL_DPA:
7533            init_get_bits(&hx->s.gb, ptr, bit_length);
7534            hx->intra_gb_ptr=
7535            hx->inter_gb_ptr= NULL;
7536            hx->s.data_partitioning = 1;
7537
7538            err = decode_slice_header(hx, h);
7539            break;
7540        case NAL_DPB:
7541            init_get_bits(&hx->intra_gb, ptr, bit_length);
7542            hx->intra_gb_ptr= &hx->intra_gb;
7543            break;
7544        case NAL_DPC:
7545            init_get_bits(&hx->inter_gb, ptr, bit_length);
7546            hx->inter_gb_ptr= &hx->inter_gb;
7547
7548            if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7549               && s->context_initialized
7550               && s->hurry_up < 5
7551               && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7552               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
7553               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7554               && avctx->skip_frame < AVDISCARD_ALL)
7555                context_count++;
7556            break;
7557        case NAL_SEI:
7558            init_get_bits(&s->gb, ptr, bit_length);
7559            ff_h264_decode_sei(h);
7560            break;
7561        case NAL_SPS:
7562            init_get_bits(&s->gb, ptr, bit_length);
7563            ff_h264_decode_seq_parameter_set(h);
7564
7565            if(s->flags& CODEC_FLAG_LOW_DELAY)
7566                s->low_delay=1;
7567
7568            if(avctx->has_b_frames < 2)
7569                avctx->has_b_frames= !s->low_delay;
7570            break;
7571        case NAL_PPS:
7572            init_get_bits(&s->gb, ptr, bit_length);
7573
7574            ff_h264_decode_picture_parameter_set(h, bit_length);
7575
7576            break;
7577        case NAL_AUD:
7578        case NAL_END_SEQUENCE:
7579        case NAL_END_STREAM:
7580        case NAL_FILLER_DATA:
7581        case NAL_SPS_EXT:
7582        case NAL_AUXILIARY_SLICE:
7583            break;
7584        default:
7585            av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7586        }
7587
7588        if(context_count == h->max_contexts) {
7589            execute_decode_slices(h, context_count);
7590            context_count = 0;
7591        }
7592
7593        if (err < 0)
7594            av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7595        else if(err == 1) {
7596            /* Slice could not be decoded in parallel mode, copy down
7597             * NAL unit stuff to context 0 and restart. Note that
7598             * rbsp_buffer is not transferred, but since we no longer
7599             * run in parallel mode this should not be an issue. */
7600            h->nal_unit_type = hx->nal_unit_type;
7601            h->nal_ref_idc   = hx->nal_ref_idc;
7602            hx = h;
7603            goto again;
7604        }
7605    }
7606    if(context_count)
7607        execute_decode_slices(h, context_count);
7608    return buf_index;
7609}
7610
7611/**
7612 * returns the number of bytes consumed for building the current frame
7613 */
7614static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7615        if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7616        if(pos+10>buf_size) pos=buf_size; // oops ;)
7617
7618        return pos;
7619}
7620
7621static int decode_frame(AVCodecContext *avctx,
7622                             void *data, int *data_size,
7623                             const uint8_t *buf, int buf_size)
7624{
7625    H264Context *h = avctx->priv_data;
7626    MpegEncContext *s = &h->s;
7627    AVFrame *pict = data;
7628    int buf_index;
7629
7630    s->flags= avctx->flags;
7631    s->flags2= avctx->flags2;
7632
7633   /* end of stream, output what is still in the buffers */
7634    if (buf_size == 0) {
7635        Picture *out;
7636        int i, out_idx;
7637
7638//FIXME factorize this with the output code below
7639        out = h->delayed_pic[0];
7640        out_idx = 0;
7641        for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7642            if(h->delayed_pic[i]->poc < out->poc){
7643                out = h->delayed_pic[i];
7644                out_idx = i;
7645            }
7646
7647        for(i=out_idx; h->delayed_pic[i]; i++)
7648            h->delayed_pic[i] = h->delayed_pic[i+1];
7649
7650        if(out){
7651            *data_size = sizeof(AVFrame);
7652            *pict= *(AVFrame*)out;
7653        }
7654
7655        return 0;
7656    }
7657
7658    if(h->is_avc && !h->got_avcC) {
7659        int i, cnt, nalsize;
7660        unsigned char *p = avctx->extradata;
7661        if(avctx->extradata_size < 7) {
7662            av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7663            return -1;
7664        }
7665        if(*p != 1) {
7666            av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7667            return -1;
7668        }
7669        /* sps and pps in the avcC always have length coded with 2 bytes,
7670           so put a fake nal_length_size = 2 while parsing them */
7671        h->nal_length_size = 2;
7672        // Decode sps from avcC
7673        cnt = *(p+5) & 0x1f; // Number of sps
7674        p += 6;
7675        for (i = 0; i < cnt; i++) {
7676            nalsize = AV_RB16(p) + 2;
7677            if(decode_nal_units(h, p, nalsize) < 0) {
7678                av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7679                return -1;
7680            }
7681            p += nalsize;
7682        }
7683        // Decode pps from avcC
7684        cnt = *(p++); // Number of pps
7685        for (i = 0; i < cnt; i++) {
7686            nalsize = AV_RB16(p) + 2;
7687            if(decode_nal_units(h, p, nalsize)  != nalsize) {
7688                av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7689                return -1;
7690            }
7691            p += nalsize;
7692        }
7693        // Now store right nal length size, that will be use to parse all other nals
7694        h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7695        // Do not reparse avcC
7696        h->got_avcC = 1;
7697    }
7698
7699    if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7700        if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7701            return -1;
7702        h->got_avcC = 1;
7703    }
7704
7705    buf_index=decode_nal_units(h, buf, buf_size);
7706    if(buf_index < 0)
7707        return -1;
7708
7709    if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7710        if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7711        av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7712        return -1;
7713    }
7714
7715    if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7716        Picture *out = s->current_picture_ptr;
7717        Picture *cur = s->current_picture_ptr;
7718        int i, pics, cross_idr, out_of_order, out_idx;
7719
7720        s->mb_y= 0;
7721
7722        s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7723        s->current_picture_ptr->pict_type= s->pict_type;
7724
7725        if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7726            ff_vdpau_h264_set_reference_frames(s);
7727
7728        if(!s->dropable) {
7729            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7730            h->prev_poc_msb= h->poc_msb;
7731            h->prev_poc_lsb= h->poc_lsb;
7732        }
7733        h->prev_frame_num_offset= h->frame_num_offset;
7734        h->prev_frame_num= h->frame_num;
7735
7736        if (avctx->hwaccel) {
7737            if (avctx->hwaccel->end_frame(avctx) < 0)
7738                av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
7739        }
7740
7741        if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7742            ff_vdpau_h264_picture_complete(s);
7743
7744        /*
7745         * FIXME: Error handling code does not seem to support interlaced
7746         * when slices span multiple rows
7747         * The ff_er_add_slice calls don't work right for bottom
7748         * fields; they cause massive erroneous error concealing
7749         * Error marking covers both fields (top and bottom).
7750         * This causes a mismatched s->error_count
7751         * and a bad error table. Further, the error count goes to
7752         * INT_MAX when called for bottom field, because mb_y is
7753         * past end by one (callers fault) and resync_mb_y != 0
7754         * causes problems for the first MB line, too.
7755         */
7756        if (!FIELD_PICTURE)
7757            ff_er_frame_end(s);
7758
7759        MPV_frame_end(s);
7760
7761        if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7762            /* Wait for second field. */
7763            *data_size = 0;
7764
7765        } else {
7766            cur->repeat_pict = 0;
7767
7768            /* Signal interlacing information externally. */
7769            /* Prioritize picture timing SEI information over used decoding process if it exists. */
7770            if(h->sps.pic_struct_present_flag){
7771                switch (h->sei_pic_struct)
7772                {
7773                case SEI_PIC_STRUCT_FRAME:
7774                    cur->interlaced_frame = 0;
7775                    break;
7776                case SEI_PIC_STRUCT_TOP_FIELD:
7777                case SEI_PIC_STRUCT_BOTTOM_FIELD:
7778                case SEI_PIC_STRUCT_TOP_BOTTOM:
7779                case SEI_PIC_STRUCT_BOTTOM_TOP:
7780                    cur->interlaced_frame = 1;
7781                    break;
7782                case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7783                case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7784                    // Signal the possibility of telecined film externally (pic_struct 5,6)
7785                    // From these hints, let the applications decide if they apply deinterlacing.
7786                    cur->repeat_pict = 1;
7787                    cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7788                    break;
7789                case SEI_PIC_STRUCT_FRAME_DOUBLING:
7790                    // Force progressive here, as doubling interlaced frame is a bad idea.
7791                    cur->interlaced_frame = 0;
7792                    cur->repeat_pict = 2;
7793                    break;
7794                case SEI_PIC_STRUCT_FRAME_TRIPLING:
7795                    cur->interlaced_frame = 0;
7796                    cur->repeat_pict = 4;
7797                    break;
7798                }
7799            }else{
7800                /* Derive interlacing flag from used decoding process. */
7801                cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7802            }
7803
7804            if (cur->field_poc[0] != cur->field_poc[1]){
7805                /* Derive top_field_first from field pocs. */
7806                cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7807            }else{
7808                if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7809                    /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7810                    if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7811                      || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7812                        cur->top_field_first = 1;
7813                    else
7814                        cur->top_field_first = 0;
7815                }else{
7816                    /* Most likely progressive */
7817                    cur->top_field_first = 0;
7818                }
7819            }
7820
7821        //FIXME do something with unavailable reference frames
7822
7823            /* Sort B-frames into display order */
7824
7825            if(h->sps.bitstream_restriction_flag
7826               && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7827                s->avctx->has_b_frames = h->sps.num_reorder_frames;
7828                s->low_delay = 0;
7829            }
7830
7831            if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7832               && !h->sps.bitstream_restriction_flag){
7833                s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7834                s->low_delay= 0;
7835            }
7836
7837            pics = 0;
7838            while(h->delayed_pic[pics]) pics++;
7839
7840            assert(pics <= MAX_DELAYED_PIC_COUNT);
7841
7842            h->delayed_pic[pics++] = cur;
7843            if(cur->reference == 0)
7844                cur->reference = DELAYED_PIC_REF;
7845
7846            out = h->delayed_pic[0];
7847            out_idx = 0;
7848            for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7849                if(h->delayed_pic[i]->poc < out->poc){
7850                    out = h->delayed_pic[i];
7851                    out_idx = i;
7852                }
7853            cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7854
7855            out_of_order = !cross_idr && out->poc < h->outputed_poc;
7856
7857            if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7858                { }
7859            else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7860               || (s->low_delay &&
7861                ((!cross_idr && out->poc > h->outputed_poc + 2)
7862                 || cur->pict_type == FF_B_TYPE)))
7863            {
7864                s->low_delay = 0;
7865                s->avctx->has_b_frames++;
7866            }
7867
7868            if(out_of_order || pics > s->avctx->has_b_frames){
7869                out->reference &= ~DELAYED_PIC_REF;
7870                for(i=out_idx; h->delayed_pic[i]; i++)
7871                    h->delayed_pic[i] = h->delayed_pic[i+1];
7872            }
7873            if(!out_of_order && pics > s->avctx->has_b_frames){
7874                *data_size = sizeof(AVFrame);
7875
7876                h->outputed_poc = out->poc;
7877                *pict= *(AVFrame*)out;
7878            }else{
7879                av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7880            }
7881        }
7882    }
7883
7884    assert(pict->data[0] || !*data_size);
7885    ff_print_debug_info(s, pict);
7886//printf("out %d\n", (int)pict->data[0]);
7887#if 0 //?
7888
7889    /* Return the Picture timestamp as the frame number */
7890    /* we subtract 1 because it is added on utils.c     */
7891    avctx->frame_number = s->picture_number - 1;
7892#endif
7893    return get_consumed_bytes(s, buf_index, buf_size);
7894}
7895#if 0
7896static inline void fill_mb_avail(H264Context *h){
7897    MpegEncContext * const s = &h->s;
7898    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7899
7900    if(s->mb_y){
7901        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7902        h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
7903        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7904    }else{
7905        h->mb_avail[0]=
7906        h->mb_avail[1]=
7907        h->mb_avail[2]= 0;
7908    }
7909    h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7910    h->mb_avail[4]= 1; //FIXME move out
7911    h->mb_avail[5]= 0; //FIXME move out
7912}
7913#endif
7914
7915#ifdef TEST
7916#undef printf
7917#undef random
7918#define COUNT 8000
7919#define SIZE (COUNT*40)
7920int main(void){
7921    int i;
7922    uint8_t temp[SIZE];
7923    PutBitContext pb;
7924    GetBitContext gb;
7925//    int int_temp[10000];
7926    DSPContext dsp;
7927    AVCodecContext avctx;
7928
7929    dsputil_init(&dsp, &avctx);
7930
7931    init_put_bits(&pb, temp, SIZE);
7932    printf("testing unsigned exp golomb\n");
7933    for(i=0; i<COUNT; i++){
7934        START_TIMER
7935        set_ue_golomb(&pb, i);
7936        STOP_TIMER("set_ue_golomb");
7937    }
7938    flush_put_bits(&pb);
7939
7940    init_get_bits(&gb, temp, 8*SIZE);
7941    for(i=0; i<COUNT; i++){
7942        int j, s;
7943
7944        s= show_bits(&gb, 24);
7945
7946        START_TIMER
7947        j= get_ue_golomb(&gb);
7948        if(j != i){
7949            printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7950//            return -1;
7951        }
7952        STOP_TIMER("get_ue_golomb");
7953    }
7954
7955
7956    init_put_bits(&pb, temp, SIZE);
7957    printf("testing signed exp golomb\n");
7958    for(i=0; i<COUNT; i++){
7959        START_TIMER
7960        set_se_golomb(&pb, i - COUNT/2);
7961        STOP_TIMER("set_se_golomb");
7962    }
7963    flush_put_bits(&pb);
7964
7965    init_get_bits(&gb, temp, 8*SIZE);
7966    for(i=0; i<COUNT; i++){
7967        int j, s;
7968
7969        s= show_bits(&gb, 24);
7970
7971        START_TIMER
7972        j= get_se_golomb(&gb);
7973        if(j != i - COUNT/2){
7974            printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7975//            return -1;
7976        }
7977        STOP_TIMER("get_se_golomb");
7978    }
7979
7980#if 0
7981    printf("testing 4x4 (I)DCT\n");
7982
7983    DCTELEM block[16];
7984    uint8_t src[16], ref[16];
7985    uint64_t error= 0, max_error=0;
7986
7987    for(i=0; i<COUNT; i++){
7988        int j;
7989//        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7990        for(j=0; j<16; j++){
7991            ref[j]= random()%255;
7992            src[j]= random()%255;
7993        }
7994
7995        h264_diff_dct_c(block, src, ref, 4);
7996
7997        //normalize
7998        for(j=0; j<16; j++){
7999//            printf("%d ", block[j]);
8000            block[j]= block[j]*4;
8001            if(j&1) block[j]= (block[j]*4 + 2)/5;
8002            if(j&4) block[j]= (block[j]*4 + 2)/5;
8003        }
8004//        printf("\n");
8005
8006        s->dsp.h264_idct_add(ref, block, 4);
8007/*        for(j=0; j<16; j++){
8008            printf("%d ", ref[j]);
8009        }
8010        printf("\n");*/
8011
8012        for(j=0; j<16; j++){
8013            int diff= FFABS(src[j] - ref[j]);
8014
8015            error+= diff*diff;
8016            max_error= FFMAX(max_error, diff);
8017        }
8018    }
8019    printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8020    printf("testing quantizer\n");
8021    for(qp=0; qp<52; qp++){
8022        for(i=0; i<16; i++)
8023            src1_block[i]= src2_block[i]= random()%255;
8024
8025    }
8026    printf("Testing NAL layer\n");
8027
8028    uint8_t bitstream[COUNT];
8029    uint8_t nal[COUNT*2];
8030    H264Context h;
8031    memset(&h, 0, sizeof(H264Context));
8032
8033    for(i=0; i<COUNT; i++){
8034        int zeros= i;
8035        int nal_length;
8036        int consumed;
8037        int out_length;
8038        uint8_t *out;
8039        int j;
8040
8041        for(j=0; j<COUNT; j++){
8042            bitstream[j]= (random() % 255) + 1;
8043        }
8044
8045        for(j=0; j<zeros; j++){
8046            int pos= random() % COUNT;
8047            while(bitstream[pos] == 0){
8048                pos++;
8049                pos %= COUNT;
8050            }
8051            bitstream[pos]=0;
8052        }
8053
8054        START_TIMER
8055
8056        nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8057        if(nal_length<0){
8058            printf("encoding failed\n");
8059            return -1;
8060        }
8061
8062        out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8063
8064        STOP_TIMER("NAL")
8065
8066        if(out_length != COUNT){
8067            printf("incorrect length %d %d\n", out_length, COUNT);
8068            return -1;
8069        }
8070
8071        if(consumed != nal_length){
8072            printf("incorrect consumed length %d %d\n", nal_length, consumed);
8073            return -1;
8074        }
8075
8076        if(memcmp(bitstream, out, COUNT)){
8077            printf("mismatch\n");
8078            return -1;
8079        }
8080    }
8081#endif
8082
8083    printf("Testing RBSP\n");
8084
8085
8086    return 0;
8087}
8088#endif /* TEST */
8089
8090
8091static av_cold int decode_end(AVCodecContext *avctx)
8092{
8093    H264Context *h = avctx->priv_data;
8094    MpegEncContext *s = &h->s;
8095    int i;
8096
8097    av_freep(&h->rbsp_buffer[0]);
8098    av_freep(&h->rbsp_buffer[1]);
8099    free_tables(h); //FIXME cleanup init stuff perhaps
8100
8101    for(i = 0; i < MAX_SPS_COUNT; i++)
8102        av_freep(h->sps_buffers + i);
8103
8104    for(i = 0; i < MAX_PPS_COUNT; i++)
8105        av_freep(h->pps_buffers + i);
8106
8107    MPV_common_end(s);
8108
8109//    memset(h, 0, sizeof(H264Context));
8110
8111    return 0;
8112}
8113
8114
8115AVCodec h264_decoder = {
8116    "h264",
8117    CODEC_TYPE_VIDEO,
8118    CODEC_ID_H264,
8119    sizeof(H264Context),
8120    decode_init,
8121    NULL,
8122    decode_end,
8123    decode_frame,
8124    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8125    .flush= flush_dpb,
8126    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8127    .pix_fmts= ff_hwaccel_pixfmt_list_420,
8128};
8129
8130#if CONFIG_H264_VDPAU_DECODER
8131AVCodec h264_vdpau_decoder = {
8132    "h264_vdpau",
8133    CODEC_TYPE_VIDEO,
8134    CODEC_ID_H264,
8135    sizeof(H264Context),
8136    decode_init,
8137    NULL,
8138    decode_end,
8139    decode_frame,
8140    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8141    .flush= flush_dpb,
8142    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8143};
8144#endif
8145
8146#if CONFIG_SVQ3_DECODER
8147#include "svq3.c"
8148#endif
8149