1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file libavcodec/h264.c 24 * H.264 / AVC / MPEG4 part10 codec. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "internal.h" 29#include "dsputil.h" 30#include "avcodec.h" 31#include "mpegvideo.h" 32#include "h264.h" 33#include "h264data.h" 34#include "h264_parser.h" 35#include "golomb.h" 36#include "mathops.h" 37#include "rectangle.h" 38#include "vdpau_internal.h" 39 40#include "cabac.h" 41#if ARCH_X86 42#include "x86/h264_i386.h" 43#endif 44 45//#undef NDEBUG 46#include <assert.h> 47 48/** 49 * Value of Picture.reference when Picture is not a reference picture, but 50 * is held for delayed output. 51 */ 52#define DELAYED_PIC_REF 4 53 54static VLC coeff_token_vlc[4]; 55static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2]; 56static const int coeff_token_vlc_tables_size[4]={520,332,280,256}; 57 58static VLC chroma_dc_coeff_token_vlc; 59static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; 60static const int chroma_dc_coeff_token_vlc_table_size = 256; 61 62static VLC total_zeros_vlc[15]; 63static VLC_TYPE total_zeros_vlc_tables[15][512][2]; 64static const int total_zeros_vlc_tables_size = 512; 65 66static VLC chroma_dc_total_zeros_vlc[3]; 67static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; 68static const int chroma_dc_total_zeros_vlc_tables_size = 8; 69 70static VLC run_vlc[6]; 71static VLC_TYPE run_vlc_tables[6][8][2]; 72static const int run_vlc_tables_size = 8; 73 74static VLC run7_vlc; 75static VLC_TYPE run7_vlc_table[96][2]; 76static const int run7_vlc_table_size = 96; 77 78static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); 79static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); 80static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); 81static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); 82static Picture * remove_long(H264Context *h, int i, int ref_mask); 83 84static av_always_inline uint32_t pack16to32(int a, int b){ 85#ifdef WORDS_BIGENDIAN 86 return (b&0xFFFF) + (a<<16); 87#else 88 return (a&0xFFFF) + (b<<16); 89#endif 90} 91 92static const uint8_t rem6[52]={ 930, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 94}; 95 96static const uint8_t div6[52]={ 970, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 98}; 99 100static const uint8_t left_block_options[4][8]={ 101 {0,1,2,3,7,10,8,11}, 102 {2,2,3,3,8,11,8,11}, 103 {0,0,1,1,7,10,7,10}, 104 {0,2,0,2,7,10,7,10} 105}; 106 107#define LEVEL_TAB_BITS 8 108static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; 109 110static void fill_caches(H264Context *h, int mb_type, int for_deblock){ 111 MpegEncContext * const s = &h->s; 112 const int mb_xy= h->mb_xy; 113 int topleft_xy, top_xy, topright_xy, left_xy[2]; 114 int topleft_type, top_type, topright_type, left_type[2]; 115 const uint8_t * left_block; 116 int topleft_partition= -1; 117 int i; 118 119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); 120 121 //FIXME deblocking could skip the intra and nnz parts. 122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) 123 return; 124 125 /* Wow, what a mess, why didn't they simplify the interlacing & intra 126 * stuff, I can't imagine that these complex rules are worth it. */ 127 128 topleft_xy = top_xy - 1; 129 topright_xy= top_xy + 1; 130 left_xy[1] = left_xy[0] = mb_xy-1; 131 left_block = left_block_options[0]; 132 if(FRAME_MBAFF){ 133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; 134 const int top_pair_xy = pair_xy - s->mb_stride; 135 const int topleft_pair_xy = top_pair_xy - 1; 136 const int topright_pair_xy = top_pair_xy + 1; 137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); 138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); 139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); 140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); 141 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 142 const int bottom = (s->mb_y & 1); 143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag); 144 145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ 146 top_xy -= s->mb_stride; 147 } 148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){ 149 topleft_xy -= s->mb_stride; 150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) { 151 topleft_xy += s->mb_stride; 152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition 153 topleft_partition = 0; 154 } 155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){ 156 topright_xy -= s->mb_stride; 157 } 158 if (left_mb_field_flag != curr_mb_field_flag) { 159 left_xy[1] = left_xy[0] = pair_xy - 1; 160 if (curr_mb_field_flag) { 161 left_xy[1] += s->mb_stride; 162 left_block = left_block_options[3]; 163 } else { 164 left_block= left_block_options[2 - bottom]; 165 } 166 } 167 } 168 169 h->top_mb_xy = top_xy; 170 h->left_mb_xy[0] = left_xy[0]; 171 h->left_mb_xy[1] = left_xy[1]; 172 if(for_deblock){ 173 topleft_type = 0; 174 topright_type = 0; 175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; 176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; 177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; 178 179 if(MB_MBAFF && !IS_INTRA(mb_type)){ 180 int list; 181 for(list=0; list<h->list_count; list++){ 182 //These values where changed for ease of performing MC, we need to change them back 183 //FIXME maybe we can make MC and loop filter use the same values or prevent 184 //the MC code from changing ref_cache and rather use a temporary array. 185 if(USES_LIST(mb_type,list)){ 186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; 187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = 188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; 189 ref += h->b8_stride; 190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = 191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; 192 } 193 } 194 } 195 }else{ 196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; 197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; 198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; 199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; 200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; 201 202 if(IS_INTRA(mb_type)){ 203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; 204 h->topleft_samples_available= 205 h->top_samples_available= 206 h->left_samples_available= 0xFFFF; 207 h->topright_samples_available= 0xEEEA; 208 209 if(!(top_type & type_mask)){ 210 h->topleft_samples_available= 0xB3FF; 211 h->top_samples_available= 0x33FF; 212 h->topright_samples_available= 0x26EA; 213 } 214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){ 215 if(IS_INTERLACED(mb_type)){ 216 if(!(left_type[0] & type_mask)){ 217 h->topleft_samples_available&= 0xDFFF; 218 h->left_samples_available&= 0x5FFF; 219 } 220 if(!(left_type[1] & type_mask)){ 221 h->topleft_samples_available&= 0xFF5F; 222 h->left_samples_available&= 0xFF5F; 223 } 224 }else{ 225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num 226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0; 227 assert(left_xy[0] == left_xy[1]); 228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){ 229 h->topleft_samples_available&= 0xDF5F; 230 h->left_samples_available&= 0x5F5F; 231 } 232 } 233 }else{ 234 if(!(left_type[0] & type_mask)){ 235 h->topleft_samples_available&= 0xDF5F; 236 h->left_samples_available&= 0x5F5F; 237 } 238 } 239 240 if(!(topleft_type & type_mask)) 241 h->topleft_samples_available&= 0x7FFF; 242 243 if(!(topright_type & type_mask)) 244 h->topright_samples_available&= 0xFBFF; 245 246 if(IS_INTRA4x4(mb_type)){ 247 if(IS_INTRA4x4(top_type)){ 248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; 249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; 250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; 251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; 252 }else{ 253 int pred; 254 if(!(top_type & type_mask)) 255 pred= -1; 256 else{ 257 pred= 2; 258 } 259 h->intra4x4_pred_mode_cache[4+8*0]= 260 h->intra4x4_pred_mode_cache[5+8*0]= 261 h->intra4x4_pred_mode_cache[6+8*0]= 262 h->intra4x4_pred_mode_cache[7+8*0]= pred; 263 } 264 for(i=0; i<2; i++){ 265 if(IS_INTRA4x4(left_type[i])){ 266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; 267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; 268 }else{ 269 int pred; 270 if(!(left_type[i] & type_mask)) 271 pred= -1; 272 else{ 273 pred= 2; 274 } 275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= 276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; 277 } 278 } 279 } 280 } 281 } 282 283 284/* 2850 . T T. T T T T 2861 L . .L . . . . 2872 L . .L . . . . 2883 . T TL . . . . 2894 L . .L . . . . 2905 L . .. . . . . 291*/ 292//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 293 if(top_type){ 294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; 295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; 296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; 297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; 298 299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; 300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; 301 302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; 303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; 304 305 }else{ 306 h->non_zero_count_cache[4+8*0]= 307 h->non_zero_count_cache[5+8*0]= 308 h->non_zero_count_cache[6+8*0]= 309 h->non_zero_count_cache[7+8*0]= 310 311 h->non_zero_count_cache[1+8*0]= 312 h->non_zero_count_cache[2+8*0]= 313 314 h->non_zero_count_cache[1+8*3]= 315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; 316 317 } 318 319 for (i=0; i<2; i++) { 320 if(left_type[i]){ 321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; 322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; 323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; 324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; 325 }else{ 326 h->non_zero_count_cache[3+8*1 + 2*8*i]= 327 h->non_zero_count_cache[3+8*2 + 2*8*i]= 328 h->non_zero_count_cache[0+8*1 + 8*i]= 329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; 330 } 331 } 332 333 if( h->pps.cabac ) { 334 // top_cbp 335 if(top_type) { 336 h->top_cbp = h->cbp_table[top_xy]; 337 } else if(IS_INTRA(mb_type)) { 338 h->top_cbp = 0x1C0; 339 } else { 340 h->top_cbp = 0; 341 } 342 // left_cbp 343 if (left_type[0]) { 344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; 345 } else if(IS_INTRA(mb_type)) { 346 h->left_cbp = 0x1C0; 347 } else { 348 h->left_cbp = 0; 349 } 350 if (left_type[0]) { 351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; 352 } 353 if (left_type[1]) { 354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; 355 } 356 } 357 358#if 1 359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 360 int list; 361 for(list=0; list<h->list_count; list++){ 362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ 363 /*if(!h->mv_cache_clean[list]){ 364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? 365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); 366 h->mv_cache_clean[list]= 1; 367 }*/ 368 continue; 369 } 370 h->mv_cache_clean[list]= 0; 371 372 if(USES_LIST(top_type, list)){ 373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; 375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; 376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; 377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; 378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; 379 h->ref_cache[list][scan8[0] + 0 - 1*8]= 380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; 381 h->ref_cache[list][scan8[0] + 2 - 1*8]= 382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; 383 }else{ 384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; 388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; 389 } 390 391 for(i=0; i<2; i++){ 392 int cache_idx = scan8[0] - 1 + i*2*8; 393 if(USES_LIST(left_type[i], list)){ 394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; 395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; 396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; 397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; 398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; 399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; 400 }else{ 401 *(uint32_t*)h->mv_cache [list][cache_idx ]= 402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; 403 h->ref_cache[list][cache_idx ]= 404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE; 405 } 406 } 407 408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)) 409 continue; 410 411 if(USES_LIST(topleft_type, list)){ 412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); 413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); 414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; 415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; 416 }else{ 417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; 418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 419 } 420 421 if(USES_LIST(topright_type, list)){ 422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; 423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; 424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; 425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; 426 }else{ 427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; 428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 429 } 430 431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) 432 continue; 433 434 h->ref_cache[list][scan8[5 ]+1] = 435 h->ref_cache[list][scan8[7 ]+1] = 436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) 437 h->ref_cache[list][scan8[4 ]] = 438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; 439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= 440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= 441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) 442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= 443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; 444 445 if( h->pps.cabac ) { 446 /* XXX beurk, Load mvd */ 447 if(USES_LIST(top_type, list)){ 448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; 450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; 451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; 452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; 453 }else{ 454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= 455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= 456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= 457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; 458 } 459 if(USES_LIST(left_type[0], list)){ 460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; 462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; 463 }else{ 464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= 465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; 466 } 467 if(USES_LIST(left_type[1], list)){ 468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; 469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; 470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; 471 }else{ 472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= 473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; 474 } 475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= 476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= 477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) 478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= 479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; 480 481 if(h->slice_type_nos == FF_B_TYPE){ 482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); 483 484 if(IS_DIRECT(top_type)){ 485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; 486 }else if(IS_8X8(top_type)){ 487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; 488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; 489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; 490 }else{ 491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; 492 } 493 494 if(IS_DIRECT(left_type[0])) 495 h->direct_cache[scan8[0] - 1 + 0*8]= 1; 496 else if(IS_8X8(left_type[0])) 497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; 498 else 499 h->direct_cache[scan8[0] - 1 + 0*8]= 0; 500 501 if(IS_DIRECT(left_type[1])) 502 h->direct_cache[scan8[0] - 1 + 2*8]= 1; 503 else if(IS_8X8(left_type[1])) 504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; 505 else 506 h->direct_cache[scan8[0] - 1 + 2*8]= 0; 507 } 508 } 509 510 if(FRAME_MBAFF){ 511#define MAP_MVS\ 512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ 513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ 514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ 515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ 516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ 517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ 518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ 519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ 520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ 521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) 522 if(MB_FIELD){ 523#define MAP_F2F(idx, mb_type)\ 524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ 525 h->ref_cache[list][idx] <<= 1;\ 526 h->mv_cache[list][idx][1] /= 2;\ 527 h->mvd_cache[list][idx][1] /= 2;\ 528 } 529 MAP_MVS 530#undef MAP_F2F 531 }else{ 532#define MAP_F2F(idx, mb_type)\ 533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ 534 h->ref_cache[list][idx] >>= 1;\ 535 h->mv_cache[list][idx][1] <<= 1;\ 536 h->mvd_cache[list][idx][1] <<= 1;\ 537 } 538 MAP_MVS 539#undef MAP_F2F 540 } 541 } 542 } 543 } 544#endif 545 546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); 547} 548 549static inline void write_back_intra_pred_mode(H264Context *h){ 550 const int mb_xy= h->mb_xy; 551 552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; 553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2]; 554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3]; 555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4]; 556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4]; 557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4]; 558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4]; 559} 560 561/** 562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 563 */ 564static inline int check_intra4x4_pred_mode(H264Context *h){ 565 MpegEncContext * const s = &h->s; 566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; 567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; 568 int i; 569 570 if(!(h->top_samples_available&0x8000)){ 571 for(i=0; i<4; i++){ 572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; 573 if(status<0){ 574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 575 return -1; 576 } else if(status){ 577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status; 578 } 579 } 580 } 581 582 if((h->left_samples_available&0x8888)!=0x8888){ 583 static const int mask[4]={0x8000,0x2000,0x80,0x20}; 584 for(i=0; i<4; i++){ 585 if(!(h->left_samples_available&mask[i])){ 586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; 587 if(status<0){ 588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 589 return -1; 590 } else if(status){ 591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; 592 } 593 } 594 } 595 } 596 597 return 0; 598} //FIXME cleanup like next 599 600/** 601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 602 */ 603static inline int check_intra_pred_mode(H264Context *h, int mode){ 604 MpegEncContext * const s = &h->s; 605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; 606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; 607 608 if(mode > 6U) { 609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); 610 return -1; 611 } 612 613 if(!(h->top_samples_available&0x8000)){ 614 mode= top[ mode ]; 615 if(mode<0){ 616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 617 return -1; 618 } 619 } 620 621 if((h->left_samples_available&0x8080) != 0x8080){ 622 mode= left[ mode ]; 623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred 624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); 625 } 626 if(mode<0){ 627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 628 return -1; 629 } 630 } 631 632 return mode; 633} 634 635/** 636 * gets the predicted intra4x4 prediction mode. 637 */ 638static inline int pred_intra_mode(H264Context *h, int n){ 639 const int index8= scan8[n]; 640 const int left= h->intra4x4_pred_mode_cache[index8 - 1]; 641 const int top = h->intra4x4_pred_mode_cache[index8 - 8]; 642 const int min= FFMIN(left, top); 643 644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min); 645 646 if(min<0) return DC_PRED; 647 else return min; 648} 649 650static inline void write_back_non_zero_count(H264Context *h){ 651 const int mb_xy= h->mb_xy; 652 653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; 654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; 655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; 656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; 657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; 658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; 659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; 660 661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; 662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; 663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; 664 665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; 666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; 667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; 668} 669 670/** 671 * gets the predicted number of non-zero coefficients. 672 * @param n block index 673 */ 674static inline int pred_non_zero_count(H264Context *h, int n){ 675 const int index8= scan8[n]; 676 const int left= h->non_zero_count_cache[index8 - 1]; 677 const int top = h->non_zero_count_cache[index8 - 8]; 678 int i= left + top; 679 680 if(i<64) i= (i+1)>>1; 681 682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); 683 684 return i&31; 685} 686 687static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ 688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; 689 MpegEncContext *s = &h->s; 690 691 /* there is no consistent mapping of mvs to neighboring locations that will 692 * make mbaff happy, so we can't move all this logic to fill_caches */ 693 if(FRAME_MBAFF){ 694 const uint32_t *mb_types = s->current_picture_ptr->mb_type; 695 const int16_t *mv; 696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; 697 *C = h->mv_cache[list][scan8[0]-2]; 698 699 if(!MB_FIELD 700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){ 701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3); 702 if(IS_INTERLACED(mb_types[topright_xy])){ 703#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\ 704 const int x4 = X4, y4 = Y4;\ 705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\ 706 if(!USES_LIST(mb_type,list))\ 707 return LIST_NOT_USED;\ 708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\ 709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\ 710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ 711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP; 712 713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1); 714 } 715 } 716 if(topright_ref == PART_NOT_AVAILABLE 717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4 718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ 719 if(!MB_FIELD 720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){ 721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1); 722 } 723 if(MB_FIELD 724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]]) 725 && i >= scan8[0]+8){ 726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. 727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2); 728 } 729 } 730#undef SET_DIAG_MV 731 } 732 733 if(topright_ref != PART_NOT_AVAILABLE){ 734 *C= h->mv_cache[list][ i - 8 + part_width ]; 735 return topright_ref; 736 }else{ 737 tprintf(s->avctx, "topright MV not available\n"); 738 739 *C= h->mv_cache[list][ i - 8 - 1 ]; 740 return h->ref_cache[list][ i - 8 - 1 ]; 741 } 742} 743 744/** 745 * gets the predicted MV. 746 * @param n the block index 747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) 748 * @param mx the x component of the predicted motion vector 749 * @param my the y component of the predicted motion vector 750 */ 751static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ 752 const int index8= scan8[n]; 753 const int top_ref= h->ref_cache[list][ index8 - 8 ]; 754 const int left_ref= h->ref_cache[list][ index8 - 1 ]; 755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; 756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; 757 const int16_t * C; 758 int diagonal_ref, match_count; 759 760 assert(part_width==1 || part_width==2 || part_width==4); 761 762/* mv_cache 763 B . . A T T T T 764 U . . L . . , . 765 U . . L . . . . 766 U . . L . . , . 767 . . . L . . . . 768*/ 769 770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); 771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); 772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); 773 if(match_count > 1){ //most common 774 *mx= mid_pred(A[0], B[0], C[0]); 775 *my= mid_pred(A[1], B[1], C[1]); 776 }else if(match_count==1){ 777 if(left_ref==ref){ 778 *mx= A[0]; 779 *my= A[1]; 780 }else if(top_ref==ref){ 781 *mx= B[0]; 782 *my= B[1]; 783 }else{ 784 *mx= C[0]; 785 *my= C[1]; 786 } 787 }else{ 788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ 789 *mx= A[0]; 790 *my= A[1]; 791 }else{ 792 *mx= mid_pred(A[0], B[0], C[0]); 793 *my= mid_pred(A[1], B[1], C[1]); 794 } 795 } 796 797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); 798} 799 800/** 801 * gets the directionally predicted 16x8 MV. 802 * @param n the block index 803 * @param mx the x component of the predicted motion vector 804 * @param my the y component of the predicted motion vector 805 */ 806static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ 807 if(n==0){ 808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; 809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; 810 811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); 812 813 if(top_ref == ref){ 814 *mx= B[0]; 815 *my= B[1]; 816 return; 817 } 818 }else{ 819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; 820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; 821 822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); 823 824 if(left_ref == ref){ 825 *mx= A[0]; 826 *my= A[1]; 827 return; 828 } 829 } 830 831 //RARE 832 pred_motion(h, n, 4, list, ref, mx, my); 833} 834 835/** 836 * gets the directionally predicted 8x16 MV. 837 * @param n the block index 838 * @param mx the x component of the predicted motion vector 839 * @param my the y component of the predicted motion vector 840 */ 841static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ 842 if(n==0){ 843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; 844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; 845 846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); 847 848 if(left_ref == ref){ 849 *mx= A[0]; 850 *my= A[1]; 851 return; 852 } 853 }else{ 854 const int16_t * C; 855 int diagonal_ref; 856 857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); 858 859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); 860 861 if(diagonal_ref == ref){ 862 *mx= C[0]; 863 *my= C[1]; 864 return; 865 } 866 } 867 868 //RARE 869 pred_motion(h, n, 2, list, ref, mx, my); 870} 871 872static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ 873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; 874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; 875 876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); 877 878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE 879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ]) 880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){ 881 882 *mx = *my = 0; 883 return; 884 } 885 886 pred_motion(h, 0, 4, 0, 0, mx, my); 887 888 return; 889} 890 891static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){ 892 int poc0 = h->ref_list[0][i].poc; 893 int td = av_clip(poc1 - poc0, -128, 127); 894 if(td == 0 || h->ref_list[0][i].long_ref){ 895 return 256; 896 }else{ 897 int tb = av_clip(poc - poc0, -128, 127); 898 int tx = (16384 + (FFABS(td) >> 1)) / td; 899 return av_clip((tb*tx + 32) >> 6, -1024, 1023); 900 } 901} 902 903static inline void direct_dist_scale_factor(H264Context * const h){ 904 MpegEncContext * const s = &h->s; 905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ]; 906 const int poc1 = h->ref_list[1][0].poc; 907 int i, field; 908 for(field=0; field<2; field++){ 909 const int poc = h->s.current_picture_ptr->field_poc[field]; 910 const int poc1 = h->ref_list[1][0].field_poc[field]; 911 for(i=0; i < 2*h->ref_count[0]; i++) 912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16); 913 } 914 915 for(i=0; i<h->ref_count[0]; i++){ 916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i); 917 } 918} 919 920static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){ 921 MpegEncContext * const s = &h->s; 922 Picture * const ref1 = &h->ref_list[1][0]; 923 int j, old_ref, rfield; 924 int start= mbafi ? 16 : 0; 925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list]; 926 int interl= mbafi || s->picture_structure != PICT_FRAME; 927 928 /* bogus; fills in for missing frames */ 929 memset(map[list], 0, sizeof(map[list])); 930 931 for(rfield=0; rfield<2; rfield++){ 932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){ 933 int poc = ref1->ref_poc[colfield][list][old_ref]; 934 935 if (!interl) 936 poc |= 3; 937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed 938 poc= (poc&~3) + rfield + 1; 939 940 for(j=start; j<end; j++){ 941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){ 942 int cur_ref= mbafi ? (j-16)^field : j; 943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref; 944 if(rfield == field) 945 map[list][old_ref] = cur_ref; 946 break; 947 } 948 } 949 } 950 } 951} 952 953static inline void direct_ref_list_init(H264Context * const h){ 954 MpegEncContext * const s = &h->s; 955 Picture * const ref1 = &h->ref_list[1][0]; 956 Picture * const cur = s->current_picture_ptr; 957 int list, j, field; 958 int sidx= (s->picture_structure&1)^1; 959 int ref1sidx= (ref1->reference&1)^1; 960 961 for(list=0; list<2; list++){ 962 cur->ref_count[sidx][list] = h->ref_count[list]; 963 for(j=0; j<h->ref_count[list]; j++) 964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3); 965 } 966 967 if(s->picture_structure == PICT_FRAME){ 968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0])); 969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0])); 970 } 971 972 cur->mbaff= FRAME_MBAFF; 973 974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred) 975 return; 976 977 for(list=0; list<2; list++){ 978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0); 979 for(field=0; field<2; field++) 980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1); 981 } 982} 983 984static inline void pred_direct_motion(H264Context * const h, int *mb_type){ 985 MpegEncContext * const s = &h->s; 986 int b8_stride = h->b8_stride; 987 int b4_stride = h->b_stride; 988 int mb_xy = h->mb_xy; 989 int mb_type_col[2]; 990 const int16_t (*l1mv0)[2], (*l1mv1)[2]; 991 const int8_t *l1ref0, *l1ref1; 992 const int is_b8x8 = IS_8X8(*mb_type); 993 unsigned int sub_mb_type; 994 int i8, i4; 995 996#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM) 997 998 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL 999 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL 1000 int cur_poc = s->current_picture_ptr->poc; 1001 int *col_poc = h->ref_list[1]->field_poc; 1002 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc); 1003 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride; 1004 b8_stride = 0; 1005 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity 1006 int fieldoff= 2*(h->ref_list[1][0].reference)-3; 1007 mb_xy += s->mb_stride*fieldoff; 1008 } 1009 goto single_col; 1010 }else{ // AFL/AFR/FR/FL -> AFR/FR 1011 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR 1012 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride; 1013 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy]; 1014 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride]; 1015 b8_stride *= 3; 1016 b4_stride *= 6; 1017 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag 1018 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) 1019 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) 1020 && !is_b8x8){ 1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1022 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */ 1023 }else{ 1024 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1025 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; 1026 } 1027 }else{ // AFR/FR -> AFR/FR 1028single_col: 1029 mb_type_col[0] = 1030 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy]; 1031 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){ 1032 /* FIXME save sub mb types from previous frames (or derive from MVs) 1033 * so we know exactly what block size to use */ 1034 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ 1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; 1036 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){ 1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1038 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ 1039 }else{ 1040 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ 1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1; 1042 } 1043 } 1044 } 1045 1046 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]]; 1047 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]]; 1048 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]]; 1049 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]]; 1050 if(!b8_stride){ 1051 if(s->mb_y&1){ 1052 l1ref0 += h->b8_stride; 1053 l1ref1 += h->b8_stride; 1054 l1mv0 += 2*b4_stride; 1055 l1mv1 += 2*b4_stride; 1056 } 1057 } 1058 1059 if(h->direct_spatial_mv_pred){ 1060 int ref[2]; 1061 int mv[2][2]; 1062 int list; 1063 1064 /* FIXME interlacing + spatial direct uses wrong colocated block positions */ 1065 1066 /* ref = min(neighbors) */ 1067 for(list=0; list<2; list++){ 1068 int refa = h->ref_cache[list][scan8[0] - 1]; 1069 int refb = h->ref_cache[list][scan8[0] - 8]; 1070 int refc = h->ref_cache[list][scan8[0] - 8 + 4]; 1071 if(refc == PART_NOT_AVAILABLE) 1072 refc = h->ref_cache[list][scan8[0] - 8 - 1]; 1073 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc); 1074 if(ref[list] < 0) 1075 ref[list] = -1; 1076 } 1077 1078 if(ref[0] < 0 && ref[1] < 0){ 1079 ref[0] = ref[1] = 0; 1080 mv[0][0] = mv[0][1] = 1081 mv[1][0] = mv[1][1] = 0; 1082 }else{ 1083 for(list=0; list<2; list++){ 1084 if(ref[list] >= 0) 1085 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]); 1086 else 1087 mv[list][0] = mv[list][1] = 0; 1088 } 1089 } 1090 1091 if(ref[1] < 0){ 1092 if(!is_b8x8) 1093 *mb_type &= ~MB_TYPE_L1; 1094 sub_mb_type &= ~MB_TYPE_L1; 1095 }else if(ref[0] < 0){ 1096 if(!is_b8x8) 1097 *mb_type &= ~MB_TYPE_L0; 1098 sub_mb_type &= ~MB_TYPE_L0; 1099 } 1100 1101 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){ 1102 for(i8=0; i8<4; i8++){ 1103 int x8 = i8&1; 1104 int y8 = i8>>1; 1105 int xy8 = x8+y8*b8_stride; 1106 int xy4 = 3*x8+y8*b4_stride; 1107 int a=0, b=0; 1108 1109 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) 1110 continue; 1111 h->sub_mb_type[i8] = sub_mb_type; 1112 1113 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); 1114 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); 1115 if(!IS_INTRA(mb_type_col[y8]) 1116 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) 1117 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ 1118 if(ref[0] > 0) 1119 a= pack16to32(mv[0][0],mv[0][1]); 1120 if(ref[1] > 0) 1121 b= pack16to32(mv[1][0],mv[1][1]); 1122 }else{ 1123 a= pack16to32(mv[0][0],mv[0][1]); 1124 b= pack16to32(mv[1][0],mv[1][1]); 1125 } 1126 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); 1127 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); 1128 } 1129 }else if(IS_16X16(*mb_type)){ 1130 int a=0, b=0; 1131 1132 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); 1133 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); 1134 if(!IS_INTRA(mb_type_col[0]) 1135 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) 1136 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 1137 && (h->x264_build>33 || !h->x264_build)))){ 1138 if(ref[0] > 0) 1139 a= pack16to32(mv[0][0],mv[0][1]); 1140 if(ref[1] > 0) 1141 b= pack16to32(mv[1][0],mv[1][1]); 1142 }else{ 1143 a= pack16to32(mv[0][0],mv[0][1]); 1144 b= pack16to32(mv[1][0],mv[1][1]); 1145 } 1146 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); 1147 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); 1148 }else{ 1149 for(i8=0; i8<4; i8++){ 1150 const int x8 = i8&1; 1151 const int y8 = i8>>1; 1152 1153 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) 1154 continue; 1155 h->sub_mb_type[i8] = sub_mb_type; 1156 1157 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); 1158 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); 1159 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); 1160 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); 1161 1162 /* col_zero_flag */ 1163 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0 1164 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0 1165 && (h->x264_build>33 || !h->x264_build)))){ 1166 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1; 1167 if(IS_SUB_8X8(sub_mb_type)){ 1168 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; 1169 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ 1170 if(ref[0] == 0) 1171 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); 1172 if(ref[1] == 0) 1173 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); 1174 } 1175 }else 1176 for(i4=0; i4<4; i4++){ 1177 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; 1178 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ 1179 if(ref[0] == 0) 1180 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; 1181 if(ref[1] == 0) 1182 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0; 1183 } 1184 } 1185 } 1186 } 1187 } 1188 }else{ /* direct temporal mv pred */ 1189 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]}; 1190 const int *dist_scale_factor = h->dist_scale_factor; 1191 int ref_offset= 0; 1192 1193 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){ 1194 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0]; 1195 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1]; 1196 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1]; 1197 } 1198 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0])) 1199 ref_offset += 16; 1200 1201 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){ 1202 /* FIXME assumes direct_8x8_inference == 1 */ 1203 int y_shift = 2*!IS_INTERLACED(*mb_type); 1204 1205 for(i8=0; i8<4; i8++){ 1206 const int x8 = i8&1; 1207 const int y8 = i8>>1; 1208 int ref0, scale; 1209 const int16_t (*l1mv)[2]= l1mv0; 1210 1211 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) 1212 continue; 1213 h->sub_mb_type[i8] = sub_mb_type; 1214 1215 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); 1216 if(IS_INTRA(mb_type_col[y8])){ 1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); 1218 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); 1219 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); 1220 continue; 1221 } 1222 1223 ref0 = l1ref0[x8 + y8*b8_stride]; 1224 if(ref0 >= 0) 1225 ref0 = map_col_to_list0[0][ref0 + ref_offset]; 1226 else{ 1227 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset]; 1228 l1mv= l1mv1; 1229 } 1230 scale = dist_scale_factor[ref0]; 1231 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); 1232 1233 { 1234 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride]; 1235 int my_col = (mv_col[1]<<y_shift)/2; 1236 int mx = (scale * mv_col[0] + 128) >> 8; 1237 int my = (scale * my_col + 128) >> 8; 1238 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); 1239 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4); 1240 } 1241 } 1242 return; 1243 } 1244 1245 /* one-to-one mv scaling */ 1246 1247 if(IS_16X16(*mb_type)){ 1248 int ref, mv0, mv1; 1249 1250 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); 1251 if(IS_INTRA(mb_type_col[0])){ 1252 ref=mv0=mv1=0; 1253 }else{ 1254 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset] 1255 : map_col_to_list0[1][l1ref1[0] + ref_offset]; 1256 const int scale = dist_scale_factor[ref0]; 1257 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; 1258 int mv_l0[2]; 1259 mv_l0[0] = (scale * mv_col[0] + 128) >> 8; 1260 mv_l0[1] = (scale * mv_col[1] + 128) >> 8; 1261 ref= ref0; 1262 mv0= pack16to32(mv_l0[0],mv_l0[1]); 1263 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); 1264 } 1265 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1); 1266 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4); 1267 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4); 1268 }else{ 1269 for(i8=0; i8<4; i8++){ 1270 const int x8 = i8&1; 1271 const int y8 = i8>>1; 1272 int ref0, scale; 1273 const int16_t (*l1mv)[2]= l1mv0; 1274 1275 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) 1276 continue; 1277 h->sub_mb_type[i8] = sub_mb_type; 1278 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); 1279 if(IS_INTRA(mb_type_col[0])){ 1280 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); 1281 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); 1282 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); 1283 continue; 1284 } 1285 1286 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset; 1287 if(ref0 >= 0) 1288 ref0 = map_col_to_list0[0][ref0]; 1289 else{ 1290 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset]; 1291 l1mv= l1mv1; 1292 } 1293 scale = dist_scale_factor[ref0]; 1294 1295 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); 1296 if(IS_SUB_8X8(sub_mb_type)){ 1297 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride]; 1298 int mx = (scale * mv_col[0] + 128) >> 8; 1299 int my = (scale * mv_col[1] + 128) >> 8; 1300 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); 1301 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4); 1302 }else 1303 for(i4=0; i4<4; i4++){ 1304 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; 1305 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; 1306 mv_l0[0] = (scale * mv_col[0] + 128) >> 8; 1307 mv_l0[1] = (scale * mv_col[1] + 128) >> 8; 1308 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 1309 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); 1310 } 1311 } 1312 } 1313 } 1314} 1315 1316static inline void write_back_motion(H264Context *h, int mb_type){ 1317 MpegEncContext * const s = &h->s; 1318 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; 1319 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; 1320 int list; 1321 1322 if(!USES_LIST(mb_type, 0)) 1323 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); 1324 1325 for(list=0; list<h->list_count; list++){ 1326 int y; 1327 if(!USES_LIST(mb_type, list)) 1328 continue; 1329 1330 for(y=0; y<4; y++){ 1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; 1332 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; 1333 } 1334 if( h->pps.cabac ) { 1335 if(IS_SKIP(mb_type)) 1336 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4); 1337 else 1338 for(y=0; y<4; y++){ 1339 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; 1340 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; 1341 } 1342 } 1343 1344 { 1345 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; 1346 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; 1347 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; 1348 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; 1349 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; 1350 } 1351 } 1352 1353 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){ 1354 if(IS_8X8(mb_type)){ 1355 uint8_t *direct_table = &h->direct_table[b8_xy]; 1356 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; 1357 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; 1358 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; 1359 } 1360 } 1361} 1362 1363const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ 1364 int i, si, di; 1365 uint8_t *dst; 1366 int bufidx; 1367 1368// src[0]&0x80; //forbidden bit 1369 h->nal_ref_idc= src[0]>>5; 1370 h->nal_unit_type= src[0]&0x1F; 1371 1372 src++; length--; 1373#if 0 1374 for(i=0; i<length; i++) 1375 printf("%2X ", src[i]); 1376#endif 1377 1378#if HAVE_FAST_UNALIGNED 1379# if HAVE_FAST_64BIT 1380# define RS 7 1381 for(i=0; i+1<length; i+=9){ 1382 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) 1383# else 1384# define RS 3 1385 for(i=0; i+1<length; i+=5){ 1386 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U)) 1387# endif 1388 continue; 1389 if(i>0 && !src[i]) i--; 1390 while(src[i]) i++; 1391#else 1392# define RS 0 1393 for(i=0; i+1<length; i+=2){ 1394 if(src[i]) continue; 1395 if(i>0 && src[i-1]==0) i--; 1396#endif 1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){ 1398 if(src[i+2]!=3){ 1399 /* startcode, so we must be past the end */ 1400 length=i; 1401 } 1402 break; 1403 } 1404 i-= RS; 1405 } 1406 1407 if(i>=length-1){ //no escaped 0 1408 *dst_length= length; 1409 *consumed= length+1; //+1 for the header 1410 return src; 1411 } 1412 1413 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data 1414 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); 1415 dst= h->rbsp_buffer[bufidx]; 1416 1417 if (dst == NULL){ 1418 return NULL; 1419 } 1420 1421//printf("decoding esc\n"); 1422 memcpy(dst, src, i); 1423 si=di=i; 1424 while(si+2<length){ 1425 //remove escapes (very rare 1:2^22) 1426 if(src[si+2]>3){ 1427 dst[di++]= src[si++]; 1428 dst[di++]= src[si++]; 1429 }else if(src[si]==0 && src[si+1]==0){ 1430 if(src[si+2]==3){ //escape 1431 dst[di++]= 0; 1432 dst[di++]= 0; 1433 si+=3; 1434 continue; 1435 }else //next start code 1436 goto nsc; 1437 } 1438 1439 dst[di++]= src[si++]; 1440 } 1441 while(si<length) 1442 dst[di++]= src[si++]; 1443nsc: 1444 1445 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); 1446 1447 *dst_length= di; 1448 *consumed= si + 1;//+1 for the header 1449//FIXME store exact number of bits in the getbitcontext (it is needed for decoding) 1450 return dst; 1451} 1452 1453int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ 1454 int v= *src; 1455 int r; 1456 1457 tprintf(h->s.avctx, "rbsp trailing %X\n", v); 1458 1459 for(r=1; r<9; r++){ 1460 if(v&1) return r; 1461 v>>=1; 1462 } 1463 return 0; 1464} 1465 1466/** 1467 * IDCT transforms the 16 dc values and dequantizes them. 1468 * @param qp quantization parameter 1469 */ 1470static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ 1471#define stride 16 1472 int i; 1473 int temp[16]; //FIXME check if this is a good idea 1474 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 1475 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 1476 1477//memset(block, 64, 2*256); 1478//return; 1479 for(i=0; i<4; i++){ 1480 const int offset= y_offset[i]; 1481 const int z0= block[offset+stride*0] + block[offset+stride*4]; 1482 const int z1= block[offset+stride*0] - block[offset+stride*4]; 1483 const int z2= block[offset+stride*1] - block[offset+stride*5]; 1484 const int z3= block[offset+stride*1] + block[offset+stride*5]; 1485 1486 temp[4*i+0]= z0+z3; 1487 temp[4*i+1]= z1+z2; 1488 temp[4*i+2]= z1-z2; 1489 temp[4*i+3]= z0-z3; 1490 } 1491 1492 for(i=0; i<4; i++){ 1493 const int offset= x_offset[i]; 1494 const int z0= temp[4*0+i] + temp[4*2+i]; 1495 const int z1= temp[4*0+i] - temp[4*2+i]; 1496 const int z2= temp[4*1+i] - temp[4*3+i]; 1497 const int z3= temp[4*1+i] + temp[4*3+i]; 1498 1499 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual 1500 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); 1501 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); 1502 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); 1503 } 1504} 1505 1506#if 0 1507/** 1508 * DCT transforms the 16 dc values. 1509 * @param qp quantization parameter ??? FIXME 1510 */ 1511static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ 1512// const int qmul= dequant_coeff[qp][0]; 1513 int i; 1514 int temp[16]; //FIXME check if this is a good idea 1515 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 1516 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 1517 1518 for(i=0; i<4; i++){ 1519 const int offset= y_offset[i]; 1520 const int z0= block[offset+stride*0] + block[offset+stride*4]; 1521 const int z1= block[offset+stride*0] - block[offset+stride*4]; 1522 const int z2= block[offset+stride*1] - block[offset+stride*5]; 1523 const int z3= block[offset+stride*1] + block[offset+stride*5]; 1524 1525 temp[4*i+0]= z0+z3; 1526 temp[4*i+1]= z1+z2; 1527 temp[4*i+2]= z1-z2; 1528 temp[4*i+3]= z0-z3; 1529 } 1530 1531 for(i=0; i<4; i++){ 1532 const int offset= x_offset[i]; 1533 const int z0= temp[4*0+i] + temp[4*2+i]; 1534 const int z1= temp[4*0+i] - temp[4*2+i]; 1535 const int z2= temp[4*1+i] - temp[4*3+i]; 1536 const int z3= temp[4*1+i] + temp[4*3+i]; 1537 1538 block[stride*0 +offset]= (z0 + z3)>>1; 1539 block[stride*2 +offset]= (z1 + z2)>>1; 1540 block[stride*8 +offset]= (z1 - z2)>>1; 1541 block[stride*10+offset]= (z0 - z3)>>1; 1542 } 1543} 1544#endif 1545 1546#undef xStride 1547#undef stride 1548 1549static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ 1550 const int stride= 16*2; 1551 const int xStride= 16; 1552 int a,b,c,d,e; 1553 1554 a= block[stride*0 + xStride*0]; 1555 b= block[stride*0 + xStride*1]; 1556 c= block[stride*1 + xStride*0]; 1557 d= block[stride*1 + xStride*1]; 1558 1559 e= a-b; 1560 a= a+b; 1561 b= c-d; 1562 c= c+d; 1563 1564 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; 1565 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; 1566 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; 1567 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; 1568} 1569 1570#if 0 1571static void chroma_dc_dct_c(DCTELEM *block){ 1572 const int stride= 16*2; 1573 const int xStride= 16; 1574 int a,b,c,d,e; 1575 1576 a= block[stride*0 + xStride*0]; 1577 b= block[stride*0 + xStride*1]; 1578 c= block[stride*1 + xStride*0]; 1579 d= block[stride*1 + xStride*1]; 1580 1581 e= a-b; 1582 a= a+b; 1583 b= c-d; 1584 c= c+d; 1585 1586 block[stride*0 + xStride*0]= (a+c); 1587 block[stride*0 + xStride*1]= (e+b); 1588 block[stride*1 + xStride*0]= (a-c); 1589 block[stride*1 + xStride*1]= (e-b); 1590} 1591#endif 1592 1593/** 1594 * gets the chroma qp. 1595 */ 1596static inline int get_chroma_qp(H264Context *h, int t, int qscale){ 1597 return h->pps.chroma_qp_table[t][qscale]; 1598} 1599 1600static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, 1601 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1602 int src_x_offset, int src_y_offset, 1603 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ 1604 MpegEncContext * const s = &h->s; 1605 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; 1606 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; 1607 const int luma_xy= (mx&3) + ((my&3)<<2); 1608 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; 1609 uint8_t * src_cb, * src_cr; 1610 int extra_width= h->emu_edge_width; 1611 int extra_height= h->emu_edge_height; 1612 int emu=0; 1613 const int full_mx= mx>>2; 1614 const int full_my= my>>2; 1615 const int pic_width = 16*s->mb_width; 1616 const int pic_height = 16*s->mb_height >> MB_FIELD; 1617 1618 if(mx&7) extra_width -= 3; 1619 if(my&7) extra_height -= 3; 1620 1621 if( full_mx < 0-extra_width 1622 || full_my < 0-extra_height 1623 || full_mx + 16/*FIXME*/ > pic_width + extra_width 1624 || full_my + 16/*FIXME*/ > pic_height + extra_height){ 1625 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 1626 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; 1627 emu=1; 1628 } 1629 1630 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? 1631 if(!square){ 1632 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); 1633 } 1634 1635 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; 1636 1637 if(MB_FIELD){ 1638 // chroma offset when predicting from a field of opposite parity 1639 my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); 1640 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); 1641 } 1642 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; 1643 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; 1644 1645 if(emu){ 1646 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 1647 src_cb= s->edge_emu_buffer; 1648 } 1649 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); 1650 1651 if(emu){ 1652 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 1653 src_cr= s->edge_emu_buffer; 1654 } 1655 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); 1656} 1657 1658static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, 1659 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1660 int x_offset, int y_offset, 1661 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1662 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 1663 int list0, int list1){ 1664 MpegEncContext * const s = &h->s; 1665 qpel_mc_func *qpix_op= qpix_put; 1666 h264_chroma_mc_func chroma_op= chroma_put; 1667 1668 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; 1669 dest_cb += x_offset + y_offset*h->mb_uvlinesize; 1670 dest_cr += x_offset + y_offset*h->mb_uvlinesize; 1671 x_offset += 8*s->mb_x; 1672 y_offset += 8*(s->mb_y >> MB_FIELD); 1673 1674 if(list0){ 1675 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; 1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 0, 1677 dest_y, dest_cb, dest_cr, x_offset, y_offset, 1678 qpix_op, chroma_op); 1679 1680 qpix_op= qpix_avg; 1681 chroma_op= chroma_avg; 1682 } 1683 1684 if(list1){ 1685 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; 1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 1, 1687 dest_y, dest_cb, dest_cr, x_offset, y_offset, 1688 qpix_op, chroma_op); 1689 } 1690} 1691 1692static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, 1693 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1694 int x_offset, int y_offset, 1695 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1696 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, 1697 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, 1698 int list0, int list1){ 1699 MpegEncContext * const s = &h->s; 1700 1701 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; 1702 dest_cb += x_offset + y_offset*h->mb_uvlinesize; 1703 dest_cr += x_offset + y_offset*h->mb_uvlinesize; 1704 x_offset += 8*s->mb_x; 1705 y_offset += 8*(s->mb_y >> MB_FIELD); 1706 1707 if(list0 && list1){ 1708 /* don't optimize for luma-only case, since B-frames usually 1709 * use implicit weights => chroma too. */ 1710 uint8_t *tmp_cb = s->obmc_scratchpad; 1711 uint8_t *tmp_cr = s->obmc_scratchpad + 8; 1712 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; 1713 int refn0 = h->ref_cache[0][ scan8[n] ]; 1714 int refn1 = h->ref_cache[1][ scan8[n] ]; 1715 1716 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, 1717 dest_y, dest_cb, dest_cr, 1718 x_offset, y_offset, qpix_put, chroma_put); 1719 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, 1720 tmp_y, tmp_cb, tmp_cr, 1721 x_offset, y_offset, qpix_put, chroma_put); 1722 1723 if(h->use_weight == 2){ 1724 int weight0 = h->implicit_weight[refn0][refn1]; 1725 int weight1 = 64 - weight0; 1726 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); 1727 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); 1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); 1729 }else{ 1730 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, 1731 h->luma_weight[0][refn0], h->luma_weight[1][refn1], 1732 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]); 1733 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 1734 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], 1735 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]); 1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 1737 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], 1738 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]); 1739 } 1740 }else{ 1741 int list = list1 ? 1 : 0; 1742 int refn = h->ref_cache[list][ scan8[n] ]; 1743 Picture *ref= &h->ref_list[list][refn]; 1744 mc_dir_part(h, ref, n, square, chroma_height, delta, list, 1745 dest_y, dest_cb, dest_cr, x_offset, y_offset, 1746 qpix_put, chroma_put); 1747 1748 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, 1749 h->luma_weight[list][refn], h->luma_offset[list][refn]); 1750 if(h->use_weight_chroma){ 1751 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 1752 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]); 1753 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 1754 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]); 1755 } 1756 } 1757} 1758 1759static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, 1760 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1761 int x_offset, int y_offset, 1762 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 1763 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 1764 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 1765 int list0, int list1){ 1766 if((h->use_weight==2 && list0 && list1 1767 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32)) 1768 || h->use_weight==1) 1769 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 1770 x_offset, y_offset, qpix_put, chroma_put, 1771 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); 1772 else 1773 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 1774 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); 1775} 1776 1777static inline void prefetch_motion(H264Context *h, int list){ 1778 /* fetch pixels for estimated mv 4 macroblocks ahead 1779 * optimized for 64byte cache lines */ 1780 MpegEncContext * const s = &h->s; 1781 const int refn = h->ref_cache[list][scan8[0]]; 1782 if(refn >= 0){ 1783 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; 1784 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; 1785 uint8_t **src= h->ref_list[list][refn].data; 1786 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; 1787 s->dsp.prefetch(src[0]+off, s->linesize, 4); 1788 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; 1789 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 1790 } 1791} 1792 1793static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 1794 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 1795 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 1796 h264_weight_func *weight_op, h264_biweight_func *weight_avg){ 1797 MpegEncContext * const s = &h->s; 1798 const int mb_xy= h->mb_xy; 1799 const int mb_type= s->current_picture.mb_type[mb_xy]; 1800 1801 assert(IS_INTER(mb_type)); 1802 1803 prefetch_motion(h, 0); 1804 1805 if(IS_16X16(mb_type)){ 1806 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 1807 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 1808 &weight_op[0], &weight_avg[0], 1809 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1810 }else if(IS_16X8(mb_type)){ 1811 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, 1812 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 1813 &weight_op[1], &weight_avg[1], 1814 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1815 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, 1816 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 1817 &weight_op[1], &weight_avg[1], 1818 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 1819 }else if(IS_8X16(mb_type)){ 1820 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, 1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1822 &weight_op[2], &weight_avg[2], 1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 1824 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, 1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1826 &weight_op[2], &weight_avg[2], 1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 1828 }else{ 1829 int i; 1830 1831 assert(IS_8X8(mb_type)); 1832 1833 for(i=0; i<4; i++){ 1834 const int sub_mb_type= h->sub_mb_type[i]; 1835 const int n= 4*i; 1836 int x_offset= (i&1)<<2; 1837 int y_offset= (i&2)<<1; 1838 1839 if(IS_SUB_8X8(sub_mb_type)){ 1840 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1841 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 1842 &weight_op[3], &weight_avg[3], 1843 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1844 }else if(IS_SUB_8X4(sub_mb_type)){ 1845 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1846 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 1847 &weight_op[4], &weight_avg[4], 1848 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1849 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, 1850 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 1851 &weight_op[4], &weight_avg[4], 1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1853 }else if(IS_SUB_4X8(sub_mb_type)){ 1854 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, 1855 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1856 &weight_op[5], &weight_avg[5], 1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1858 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, 1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1860 &weight_op[5], &weight_avg[5], 1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1862 }else{ 1863 int j; 1864 assert(IS_SUB_4X4(sub_mb_type)); 1865 for(j=0; j<4; j++){ 1866 int sub_x_offset= x_offset + 2*(j&1); 1867 int sub_y_offset= y_offset + (j&2); 1868 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, 1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 1870 &weight_op[6], &weight_avg[6], 1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 1872 } 1873 } 1874 } 1875 } 1876 1877 prefetch_motion(h, 1); 1878} 1879 1880static av_cold void init_cavlc_level_tab(void){ 1881 int suffix_length, mask; 1882 unsigned int i; 1883 1884 for(suffix_length=0; suffix_length<7; suffix_length++){ 1885 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){ 1886 int prefix= LEVEL_TAB_BITS - av_log2(2*i); 1887 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length); 1888 1889 mask= -(level_code&1); 1890 level_code= (((2+level_code)>>1) ^ mask) - mask; 1891 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ 1892 cavlc_level_tab[suffix_length][i][0]= level_code; 1893 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; 1894 }else if(prefix + 1 <= LEVEL_TAB_BITS){ 1895 cavlc_level_tab[suffix_length][i][0]= prefix+100; 1896 cavlc_level_tab[suffix_length][i][1]= prefix + 1; 1897 }else{ 1898 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100; 1899 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS; 1900 } 1901 } 1902 } 1903} 1904 1905static av_cold void decode_init_vlc(void){ 1906 static int done = 0; 1907 1908 if (!done) { 1909 int i; 1910 int offset; 1911 done = 1; 1912 1913 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table; 1914 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size; 1915 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 1916 &chroma_dc_coeff_token_len [0], 1, 1, 1917 &chroma_dc_coeff_token_bits[0], 1, 1, 1918 INIT_VLC_USE_NEW_STATIC); 1919 1920 offset = 0; 1921 for(i=0; i<4; i++){ 1922 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; 1923 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i]; 1924 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 1925 &coeff_token_len [i][0], 1, 1, 1926 &coeff_token_bits[i][0], 1, 1, 1927 INIT_VLC_USE_NEW_STATIC); 1928 offset += coeff_token_vlc_tables_size[i]; 1929 } 1930 /* 1931 * This is a one time safety check to make sure that 1932 * the packed static coeff_token_vlc table sizes 1933 * were initialized correctly. 1934 */ 1935 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables)); 1936 1937 for(i=0; i<3; i++){ 1938 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i]; 1939 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size; 1940 init_vlc(&chroma_dc_total_zeros_vlc[i], 1941 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, 1942 &chroma_dc_total_zeros_len [i][0], 1, 1, 1943 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1944 INIT_VLC_USE_NEW_STATIC); 1945 } 1946 for(i=0; i<15; i++){ 1947 total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; 1948 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; 1949 init_vlc(&total_zeros_vlc[i], 1950 TOTAL_ZEROS_VLC_BITS, 16, 1951 &total_zeros_len [i][0], 1, 1, 1952 &total_zeros_bits[i][0], 1, 1, 1953 INIT_VLC_USE_NEW_STATIC); 1954 } 1955 1956 for(i=0; i<6; i++){ 1957 run_vlc[i].table = run_vlc_tables[i]; 1958 run_vlc[i].table_allocated = run_vlc_tables_size; 1959 init_vlc(&run_vlc[i], 1960 RUN_VLC_BITS, 7, 1961 &run_len [i][0], 1, 1, 1962 &run_bits[i][0], 1, 1, 1963 INIT_VLC_USE_NEW_STATIC); 1964 } 1965 run7_vlc.table = run7_vlc_table, 1966 run7_vlc.table_allocated = run7_vlc_table_size; 1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 1968 &run_len [6][0], 1, 1, 1969 &run_bits[6][0], 1, 1, 1970 INIT_VLC_USE_NEW_STATIC); 1971 1972 init_cavlc_level_tab(); 1973 } 1974} 1975 1976static void free_tables(H264Context *h){ 1977 int i; 1978 H264Context *hx; 1979 av_freep(&h->intra4x4_pred_mode); 1980 av_freep(&h->chroma_pred_mode_table); 1981 av_freep(&h->cbp_table); 1982 av_freep(&h->mvd_table[0]); 1983 av_freep(&h->mvd_table[1]); 1984 av_freep(&h->direct_table); 1985 av_freep(&h->non_zero_count); 1986 av_freep(&h->slice_table_base); 1987 h->slice_table= NULL; 1988 1989 av_freep(&h->mb2b_xy); 1990 av_freep(&h->mb2b8_xy); 1991 1992 for(i = 0; i < h->s.avctx->thread_count; i++) { 1993 hx = h->thread_context[i]; 1994 if(!hx) continue; 1995 av_freep(&hx->top_borders[1]); 1996 av_freep(&hx->top_borders[0]); 1997 av_freep(&hx->s.obmc_scratchpad); 1998 } 1999} 2000 2001static void init_dequant8_coeff_table(H264Context *h){ 2002 int i,q,x; 2003 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly 2004 h->dequant8_coeff[0] = h->dequant8_buffer[0]; 2005 h->dequant8_coeff[1] = h->dequant8_buffer[1]; 2006 2007 for(i=0; i<2; i++ ){ 2008 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ 2009 h->dequant8_coeff[1] = h->dequant8_buffer[0]; 2010 break; 2011 } 2012 2013 for(q=0; q<52; q++){ 2014 int shift = div6[q]; 2015 int idx = rem6[q]; 2016 for(x=0; x<64; x++) 2017 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = 2018 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * 2019 h->pps.scaling_matrix8[i][x]) << shift; 2020 } 2021 } 2022} 2023 2024static void init_dequant4_coeff_table(H264Context *h){ 2025 int i,j,q,x; 2026 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly 2027 for(i=0; i<6; i++ ){ 2028 h->dequant4_coeff[i] = h->dequant4_buffer[i]; 2029 for(j=0; j<i; j++){ 2030 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ 2031 h->dequant4_coeff[i] = h->dequant4_buffer[j]; 2032 break; 2033 } 2034 } 2035 if(j<i) 2036 continue; 2037 2038 for(q=0; q<52; q++){ 2039 int shift = div6[q] + 2; 2040 int idx = rem6[q]; 2041 for(x=0; x<16; x++) 2042 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = 2043 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * 2044 h->pps.scaling_matrix4[i][x]) << shift; 2045 } 2046 } 2047} 2048 2049static void init_dequant_tables(H264Context *h){ 2050 int i,x; 2051 init_dequant4_coeff_table(h); 2052 if(h->pps.transform_8x8_mode) 2053 init_dequant8_coeff_table(h); 2054 if(h->sps.transform_bypass){ 2055 for(i=0; i<6; i++) 2056 for(x=0; x<16; x++) 2057 h->dequant4_coeff[i][0][x] = 1<<6; 2058 if(h->pps.transform_8x8_mode) 2059 for(i=0; i<2; i++) 2060 for(x=0; x<64; x++) 2061 h->dequant8_coeff[i][0][x] = 1<<6; 2062 } 2063} 2064 2065 2066/** 2067 * allocates tables. 2068 * needs width/height 2069 */ 2070static int alloc_tables(H264Context *h){ 2071 MpegEncContext * const s = &h->s; 2072 const int big_mb_num= s->mb_stride * (s->mb_height+1); 2073 int x,y; 2074 2075 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) 2076 2077 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) 2078 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)) 2079 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) 2080 2081 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) 2082 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); 2083 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t)); 2084 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t)); 2085 2086 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); 2087 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; 2088 2089 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t)); 2090 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t)); 2091 for(y=0; y<s->mb_height; y++){ 2092 for(x=0; x<s->mb_width; x++){ 2093 const int mb_xy= x + y*s->mb_stride; 2094 const int b_xy = 4*x + 4*y*h->b_stride; 2095 const int b8_xy= 2*x + 2*y*h->b8_stride; 2096 2097 h->mb2b_xy [mb_xy]= b_xy; 2098 h->mb2b8_xy[mb_xy]= b8_xy; 2099 } 2100 } 2101 2102 s->obmc_scratchpad = NULL; 2103 2104 if(!h->dequant4_coeff[0]) 2105 init_dequant_tables(h); 2106 2107 return 0; 2108fail: 2109 free_tables(h); 2110 return -1; 2111} 2112 2113/** 2114 * Mimic alloc_tables(), but for every context thread. 2115 */ 2116static void clone_tables(H264Context *dst, H264Context *src){ 2117 dst->intra4x4_pred_mode = src->intra4x4_pred_mode; 2118 dst->non_zero_count = src->non_zero_count; 2119 dst->slice_table = src->slice_table; 2120 dst->cbp_table = src->cbp_table; 2121 dst->mb2b_xy = src->mb2b_xy; 2122 dst->mb2b8_xy = src->mb2b8_xy; 2123 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; 2124 dst->mvd_table[0] = src->mvd_table[0]; 2125 dst->mvd_table[1] = src->mvd_table[1]; 2126 dst->direct_table = src->direct_table; 2127 2128 dst->s.obmc_scratchpad = NULL; 2129 ff_h264_pred_init(&dst->hpc, src->s.codec_id); 2130} 2131 2132/** 2133 * Init context 2134 * Allocate buffers which are not shared amongst multiple threads. 2135 */ 2136static int context_init(H264Context *h){ 2137 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)) 2138 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)) 2139 2140 return 0; 2141fail: 2142 return -1; // free_tables will clean up for us 2143} 2144 2145static av_cold void common_init(H264Context *h){ 2146 MpegEncContext * const s = &h->s; 2147 2148 s->width = s->avctx->width; 2149 s->height = s->avctx->height; 2150 s->codec_id= s->avctx->codec->id; 2151 2152 ff_h264_pred_init(&h->hpc, s->codec_id); 2153 2154 h->dequant_coeff_pps= -1; 2155 s->unrestricted_mv=1; 2156 s->decode=1; //FIXME 2157 2158 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early 2159 2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); 2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); 2162} 2163 2164/** 2165 * Reset SEI values at the beginning of the frame. 2166 * 2167 * @param h H.264 context. 2168 */ 2169static void reset_sei(H264Context *h) { 2170 h->sei_recovery_frame_cnt = -1; 2171 h->sei_dpb_output_delay = 0; 2172 h->sei_cpb_removal_delay = -1; 2173 h->sei_buffering_period_present = 0; 2174} 2175 2176static av_cold int decode_init(AVCodecContext *avctx){ 2177 H264Context *h= avctx->priv_data; 2178 MpegEncContext * const s = &h->s; 2179 2180 MPV_decode_defaults(s); 2181 2182 s->avctx = avctx; 2183 common_init(h); 2184 2185 s->out_format = FMT_H264; 2186 s->workaround_bugs= avctx->workaround_bugs; 2187 2188 // set defaults 2189// s->decode_mb= ff_h263_decode_mb; 2190 s->quarter_sample = 1; 2191 if(!avctx->has_b_frames) 2192 s->low_delay= 1; 2193 2194 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 2195 avctx->pix_fmt= PIX_FMT_VDPAU_H264; 2196 else 2197 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts); 2198 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt); 2199 2200 decode_init_vlc(); 2201 2202 if(avctx->extradata_size > 0 && avctx->extradata && 2203 *(char *)avctx->extradata == 1){ 2204 h->is_avc = 1; 2205 h->got_avcC = 0; 2206 } else { 2207 h->is_avc = 0; 2208 } 2209 2210 h->thread_context[0] = h; 2211 h->outputed_poc = INT_MIN; 2212 h->prev_poc_msb= 1<<16; 2213 reset_sei(h); 2214 if(avctx->codec_id == CODEC_ID_H264){ 2215 if(avctx->ticks_per_frame == 1){ 2216 s->avctx->time_base.den *=2; 2217 } 2218 avctx->ticks_per_frame = 2; 2219 } 2220 return 0; 2221} 2222 2223static int frame_start(H264Context *h){ 2224 MpegEncContext * const s = &h->s; 2225 int i; 2226 2227 if(MPV_frame_start(s, s->avctx) < 0) 2228 return -1; 2229 ff_er_frame_start(s); 2230 /* 2231 * MPV_frame_start uses pict_type to derive key_frame. 2232 * This is incorrect for H.264; IDR markings must be used. 2233 * Zero here; IDR markings per slice in frame or fields are ORed in later. 2234 * See decode_nal_units(). 2235 */ 2236 s->current_picture_ptr->key_frame= 0; 2237 2238 assert(s->linesize && s->uvlinesize); 2239 2240 for(i=0; i<16; i++){ 2241 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); 2242 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); 2243 } 2244 for(i=0; i<4; i++){ 2245 h->block_offset[16+i]= 2246 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); 2247 h->block_offset[24+16+i]= 2248 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); 2249 } 2250 2251 /* can't be in alloc_tables because linesize isn't known there. 2252 * FIXME: redo bipred weight to not require extra buffer? */ 2253 for(i = 0; i < s->avctx->thread_count; i++) 2254 if(!h->thread_context[i]->s.obmc_scratchpad) 2255 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); 2256 2257 /* some macroblocks will be accessed before they're available */ 2258 if(FRAME_MBAFF || s->avctx->thread_count > 1) 2259 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); 2260 2261// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; 2262 2263 // We mark the current picture as non-reference after allocating it, so 2264 // that if we break out due to an error it can be released automatically 2265 // in the next MPV_frame_start(). 2266 // SVQ3 as well as most other codecs have only last/next/current and thus 2267 // get released even with set reference, besides SVQ3 and others do not 2268 // mark frames as reference later "naturally". 2269 if(s->codec_id != CODEC_ID_SVQ3) 2270 s->current_picture_ptr->reference= 0; 2271 2272 s->current_picture_ptr->field_poc[0]= 2273 s->current_picture_ptr->field_poc[1]= INT_MAX; 2274 assert(s->current_picture_ptr->long_ref==0); 2275 2276 return 0; 2277} 2278 2279static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ 2280 MpegEncContext * const s = &h->s; 2281 int i; 2282 int step = 1; 2283 int offset = 1; 2284 int uvoffset= 1; 2285 int top_idx = 1; 2286 int skiplast= 0; 2287 2288 src_y -= linesize; 2289 src_cb -= uvlinesize; 2290 src_cr -= uvlinesize; 2291 2292 if(!simple && FRAME_MBAFF){ 2293 if(s->mb_y&1){ 2294 offset = MB_MBAFF ? 1 : 17; 2295 uvoffset= MB_MBAFF ? 1 : 9; 2296 if(!MB_MBAFF){ 2297 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize); 2298 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize); 2299 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize); 2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize); 2302 } 2303 } 2304 }else{ 2305 if(!MB_MBAFF){ 2306 h->left_border[0]= h->top_borders[0][s->mb_x][15]; 2307 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 2308 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ]; 2309 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7]; 2310 } 2311 skiplast= 1; 2312 } 2313 offset = 2314 uvoffset= 2315 top_idx = MB_MBAFF ? 0 : 1; 2316 } 2317 step= MB_MBAFF ? 2 : 1; 2318 } 2319 2320 // There are two lines saved, the line above the the top macroblock of a pair, 2321 // and the line above the bottom macroblock 2322 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15]; 2323 for(i=1; i<17 - skiplast; i++){ 2324 h->left_border[offset+i*step]= src_y[15+i* linesize]; 2325 } 2326 2327 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize); 2328 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize); 2329 2330 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 2331 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7]; 2332 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7]; 2333 for(i=1; i<9 - skiplast; i++){ 2334 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize]; 2335 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize]; 2336 } 2337 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize); 2338 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize); 2339 } 2340} 2341 2342static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ 2343 MpegEncContext * const s = &h->s; 2344 int temp8, i; 2345 uint64_t temp64; 2346 int deblock_left; 2347 int deblock_top; 2348 int mb_xy; 2349 int step = 1; 2350 int offset = 1; 2351 int uvoffset= 1; 2352 int top_idx = 1; 2353 2354 if(!simple && FRAME_MBAFF){ 2355 if(s->mb_y&1){ 2356 offset = MB_MBAFF ? 1 : 17; 2357 uvoffset= MB_MBAFF ? 1 : 9; 2358 }else{ 2359 offset = 2360 uvoffset= 2361 top_idx = MB_MBAFF ? 0 : 1; 2362 } 2363 step= MB_MBAFF ? 2 : 1; 2364 } 2365 2366 if(h->deblocking_filter == 2) { 2367 mb_xy = h->mb_xy; 2368 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1]; 2369 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy]; 2370 } else { 2371 deblock_left = (s->mb_x > 0); 2372 deblock_top = (s->mb_y > !!MB_FIELD); 2373 } 2374 2375 src_y -= linesize + 1; 2376 src_cb -= uvlinesize + 1; 2377 src_cr -= uvlinesize + 1; 2378 2379#define XCHG(a,b,t,xchg)\ 2380t= a;\ 2381if(xchg)\ 2382 a= b;\ 2383b= t; 2384 2385 if(deblock_left){ 2386 for(i = !deblock_top; i<16; i++){ 2387 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg); 2388 } 2389 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1); 2390 } 2391 2392 if(deblock_top){ 2393 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); 2394 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); 2395 if(s->mb_x+1 < s->mb_width){ 2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1); 2397 } 2398 } 2399 2400 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 2401 if(deblock_left){ 2402 for(i = !deblock_top; i<8; i++){ 2403 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg); 2404 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg); 2405 } 2406 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1); 2407 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1); 2408 } 2409 if(deblock_top){ 2410 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); 2411 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); 2412 } 2413 } 2414} 2415 2416static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ 2417 MpegEncContext * const s = &h->s; 2418 const int mb_x= s->mb_x; 2419 const int mb_y= s->mb_y; 2420 const int mb_xy= h->mb_xy; 2421 const int mb_type= s->current_picture.mb_type[mb_xy]; 2422 uint8_t *dest_y, *dest_cb, *dest_cr; 2423 int linesize, uvlinesize /*dct_offset*/; 2424 int i; 2425 int *block_offset = &h->block_offset[0]; 2426 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 2427 /* is_h264 should always be true if SVQ3 is disabled. */ 2428 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; 2429 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 2430 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); 2431 2432 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; 2433 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; 2434 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; 2435 2436 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); 2437 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); 2438 2439 if (!simple && MB_FIELD) { 2440 linesize = h->mb_linesize = s->linesize * 2; 2441 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 2442 block_offset = &h->block_offset[24]; 2443 if(mb_y&1){ //FIXME move out of this function? 2444 dest_y -= s->linesize*15; 2445 dest_cb-= s->uvlinesize*7; 2446 dest_cr-= s->uvlinesize*7; 2447 } 2448 if(FRAME_MBAFF) { 2449 int list; 2450 for(list=0; list<h->list_count; list++){ 2451 if(!USES_LIST(mb_type, list)) 2452 continue; 2453 if(IS_16X16(mb_type)){ 2454 int8_t *ref = &h->ref_cache[list][scan8[0]]; 2455 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 2456 }else{ 2457 for(i=0; i<16; i+=4){ 2458 int ref = h->ref_cache[list][scan8[i]]; 2459 if(ref >= 0) 2460 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 2461 } 2462 } 2463 } 2464 } 2465 } else { 2466 linesize = h->mb_linesize = s->linesize; 2467 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 2468// dct_offset = s->linesize * 16; 2469 } 2470 2471 if (!simple && IS_INTRA_PCM(mb_type)) { 2472 for (i=0; i<16; i++) { 2473 memcpy(dest_y + i* linesize, h->mb + i*8, 16); 2474 } 2475 for (i=0; i<8; i++) { 2476 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); 2477 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); 2478 } 2479 } else { 2480 if(IS_INTRA(mb_type)){ 2481 if(h->deblocking_filter) 2482 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); 2483 2484 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 2485 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); 2486 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); 2487 } 2488 2489 if(IS_INTRA4x4(mb_type)){ 2490 if(simple || !s->encoding){ 2491 if(IS_8x8DCT(mb_type)){ 2492 if(transform_bypass){ 2493 idct_dc_add = 2494 idct_add = s->dsp.add_pixels8; 2495 }else{ 2496 idct_dc_add = s->dsp.h264_idct8_dc_add; 2497 idct_add = s->dsp.h264_idct8_add; 2498 } 2499 for(i=0; i<16; i+=4){ 2500 uint8_t * const ptr= dest_y + block_offset[i]; 2501 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 2502 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 2503 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); 2504 }else{ 2505 const int nnz = h->non_zero_count_cache[ scan8[i] ]; 2506 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, 2507 (h->topright_samples_available<<i)&0x4000, linesize); 2508 if(nnz){ 2509 if(nnz == 1 && h->mb[i*16]) 2510 idct_dc_add(ptr, h->mb + i*16, linesize); 2511 else 2512 idct_add (ptr, h->mb + i*16, linesize); 2513 } 2514 } 2515 } 2516 }else{ 2517 if(transform_bypass){ 2518 idct_dc_add = 2519 idct_add = s->dsp.add_pixels4; 2520 }else{ 2521 idct_dc_add = s->dsp.h264_idct_dc_add; 2522 idct_add = s->dsp.h264_idct_add; 2523 } 2524 for(i=0; i<16; i++){ 2525 uint8_t * const ptr= dest_y + block_offset[i]; 2526 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 2527 2528 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 2529 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); 2530 }else{ 2531 uint8_t *topright; 2532 int nnz, tr; 2533 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ 2534 const int topright_avail= (h->topright_samples_available<<i)&0x8000; 2535 assert(mb_y || linesize <= block_offset[i]); 2536 if(!topright_avail){ 2537 tr= ptr[3 - linesize]*0x01010101; 2538 topright= (uint8_t*) &tr; 2539 }else 2540 topright= ptr + 4 - linesize; 2541 }else 2542 topright= NULL; 2543 2544 h->hpc.pred4x4[ dir ](ptr, topright, linesize); 2545 nnz = h->non_zero_count_cache[ scan8[i] ]; 2546 if(nnz){ 2547 if(is_h264){ 2548 if(nnz == 1 && h->mb[i*16]) 2549 idct_dc_add(ptr, h->mb + i*16, linesize); 2550 else 2551 idct_add (ptr, h->mb + i*16, linesize); 2552 }else 2553 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); 2554 } 2555 } 2556 } 2557 } 2558 } 2559 }else{ 2560 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); 2561 if(is_h264){ 2562 if(!transform_bypass) 2563 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]); 2564 }else 2565 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); 2566 } 2567 if(h->deblocking_filter) 2568 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); 2569 }else if(is_h264){ 2570 hl_motion(h, dest_y, dest_cb, dest_cr, 2571 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 2572 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 2573 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); 2574 } 2575 2576 2577 if(!IS_INTRA4x4(mb_type)){ 2578 if(is_h264){ 2579 if(IS_INTRA16x16(mb_type)){ 2580 if(transform_bypass){ 2581 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ 2582 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); 2583 }else{ 2584 for(i=0; i<16; i++){ 2585 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) 2586 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); 2587 } 2588 } 2589 }else{ 2590 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 2591 } 2592 }else if(h->cbp&15){ 2593 if(transform_bypass){ 2594 const int di = IS_8x8DCT(mb_type) ? 4 : 1; 2595 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; 2596 for(i=0; i<16; i+=di){ 2597 if(h->non_zero_count_cache[ scan8[i] ]){ 2598 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); 2599 } 2600 } 2601 }else{ 2602 if(IS_8x8DCT(mb_type)){ 2603 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 2604 }else{ 2605 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 2606 } 2607 } 2608 } 2609 }else{ 2610 for(i=0; i<16; i++){ 2611 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below 2612 uint8_t * const ptr= dest_y + block_offset[i]; 2613 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); 2614 } 2615 } 2616 } 2617 } 2618 2619 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ 2620 uint8_t *dest[2] = {dest_cb, dest_cr}; 2621 if(transform_bypass){ 2622 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ 2623 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); 2624 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); 2625 }else{ 2626 idct_add = s->dsp.add_pixels4; 2627 for(i=16; i<16+8; i++){ 2628 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) 2629 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 2630 } 2631 } 2632 }else{ 2633 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 2634 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 2635 if(is_h264){ 2636 idct_add = s->dsp.h264_idct_add; 2637 idct_dc_add = s->dsp.h264_idct_dc_add; 2638 for(i=16; i<16+8; i++){ 2639 if(h->non_zero_count_cache[ scan8[i] ]) 2640 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 2641 else if(h->mb[i*16]) 2642 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 2643 } 2644 }else{ 2645 for(i=16; i<16+8; i++){ 2646 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ 2647 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; 2648 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); 2649 } 2650 } 2651 } 2652 } 2653 } 2654 } 2655 if(h->cbp || IS_INTRA(mb_type)) 2656 s->dsp.clear_blocks(h->mb); 2657 2658 if(h->deblocking_filter) { 2659 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple); 2660 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb 2661 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); 2662 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); 2663 if (!simple && FRAME_MBAFF) { 2664 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 2665 } else { 2666 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 2667 } 2668 } 2669} 2670 2671/** 2672 * Process a macroblock; this case avoids checks for expensive uncommon cases. 2673 */ 2674static void hl_decode_mb_simple(H264Context *h){ 2675 hl_decode_mb_internal(h, 1); 2676} 2677 2678/** 2679 * Process a macroblock; this handles edge cases, such as interlacing. 2680 */ 2681static void av_noinline hl_decode_mb_complex(H264Context *h){ 2682 hl_decode_mb_internal(h, 0); 2683} 2684 2685static void hl_decode_mb(H264Context *h){ 2686 MpegEncContext * const s = &h->s; 2687 const int mb_xy= h->mb_xy; 2688 const int mb_type= s->current_picture.mb_type[mb_xy]; 2689 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; 2690 2691 if (is_complex) 2692 hl_decode_mb_complex(h); 2693 else hl_decode_mb_simple(h); 2694} 2695 2696static void pic_as_field(Picture *pic, const int parity){ 2697 int i; 2698 for (i = 0; i < 4; ++i) { 2699 if (parity == PICT_BOTTOM_FIELD) 2700 pic->data[i] += pic->linesize[i]; 2701 pic->reference = parity; 2702 pic->linesize[i] *= 2; 2703 } 2704 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD]; 2705} 2706 2707static int split_field_copy(Picture *dest, Picture *src, 2708 int parity, int id_add){ 2709 int match = !!(src->reference & parity); 2710 2711 if (match) { 2712 *dest = *src; 2713 if(parity != PICT_FRAME){ 2714 pic_as_field(dest, parity); 2715 dest->pic_id *= 2; 2716 dest->pic_id += id_add; 2717 } 2718 } 2719 2720 return match; 2721} 2722 2723static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){ 2724 int i[2]={0}; 2725 int index=0; 2726 2727 while(i[0]<len || i[1]<len){ 2728 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel))) 2729 i[0]++; 2730 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3)))) 2731 i[1]++; 2732 if(i[0] < len){ 2733 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num; 2734 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1); 2735 } 2736 if(i[1] < len){ 2737 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num; 2738 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0); 2739 } 2740 } 2741 2742 return index; 2743} 2744 2745static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){ 2746 int i, best_poc; 2747 int out_i= 0; 2748 2749 for(;;){ 2750 best_poc= dir ? INT_MIN : INT_MAX; 2751 2752 for(i=0; i<len; i++){ 2753 const int poc= src[i]->poc; 2754 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){ 2755 best_poc= poc; 2756 sorted[out_i]= src[i]; 2757 } 2758 } 2759 if(best_poc == (dir ? INT_MIN : INT_MAX)) 2760 break; 2761 limit= sorted[out_i++]->poc - dir; 2762 } 2763 return out_i; 2764} 2765 2766/** 2767 * fills the default_ref_list. 2768 */ 2769static int fill_default_ref_list(H264Context *h){ 2770 MpegEncContext * const s = &h->s; 2771 int i, len; 2772 2773 if(h->slice_type_nos==FF_B_TYPE){ 2774 Picture *sorted[32]; 2775 int cur_poc, list; 2776 int lens[2]; 2777 2778 if(FIELD_PICTURE) 2779 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ]; 2780 else 2781 cur_poc= s->current_picture_ptr->poc; 2782 2783 for(list= 0; list<2; list++){ 2784 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list); 2785 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list); 2786 assert(len<=32); 2787 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure); 2788 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure); 2789 assert(len<=32); 2790 2791 if(len < h->ref_count[list]) 2792 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len)); 2793 lens[list]= len; 2794 } 2795 2796 if(lens[0] == lens[1] && lens[1] > 1){ 2797 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++); 2798 if(i == lens[0]) 2799 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]); 2800 } 2801 }else{ 2802 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure); 2803 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure); 2804 assert(len <= 32); 2805 if(len < h->ref_count[0]) 2806 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len)); 2807 } 2808#ifdef TRACE 2809 for (i=0; i<h->ref_count[0]; i++) { 2810 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]); 2811 } 2812 if(h->slice_type_nos==FF_B_TYPE){ 2813 for (i=0; i<h->ref_count[1]; i++) { 2814 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]); 2815 } 2816 } 2817#endif 2818 return 0; 2819} 2820 2821static void print_short_term(H264Context *h); 2822static void print_long_term(H264Context *h); 2823 2824/** 2825 * Extract structure information about the picture described by pic_num in 2826 * the current decoding context (frame or field). Note that pic_num is 2827 * picture number without wrapping (so, 0<=pic_num<max_pic_num). 2828 * @param pic_num picture number for which to extract structure information 2829 * @param structure one of PICT_XXX describing structure of picture 2830 * with pic_num 2831 * @return frame number (short term) or long term index of picture 2832 * described by pic_num 2833 */ 2834static int pic_num_extract(H264Context *h, int pic_num, int *structure){ 2835 MpegEncContext * const s = &h->s; 2836 2837 *structure = s->picture_structure; 2838 if(FIELD_PICTURE){ 2839 if (!(pic_num & 1)) 2840 /* opposite field */ 2841 *structure ^= PICT_FRAME; 2842 pic_num >>= 1; 2843 } 2844 2845 return pic_num; 2846} 2847 2848static int decode_ref_pic_list_reordering(H264Context *h){ 2849 MpegEncContext * const s = &h->s; 2850 int list, index, pic_structure; 2851 2852 print_short_term(h); 2853 print_long_term(h); 2854 2855 for(list=0; list<h->list_count; list++){ 2856 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]); 2857 2858 if(get_bits1(&s->gb)){ 2859 int pred= h->curr_pic_num; 2860 2861 for(index=0; ; index++){ 2862 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb); 2863 unsigned int pic_id; 2864 int i; 2865 Picture *ref = NULL; 2866 2867 if(reordering_of_pic_nums_idc==3) 2868 break; 2869 2870 if(index >= h->ref_count[list]){ 2871 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n"); 2872 return -1; 2873 } 2874 2875 if(reordering_of_pic_nums_idc<3){ 2876 if(reordering_of_pic_nums_idc<2){ 2877 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1; 2878 int frame_num; 2879 2880 if(abs_diff_pic_num > h->max_pic_num){ 2881 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n"); 2882 return -1; 2883 } 2884 2885 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num; 2886 else pred+= abs_diff_pic_num; 2887 pred &= h->max_pic_num - 1; 2888 2889 frame_num = pic_num_extract(h, pred, &pic_structure); 2890 2891 for(i= h->short_ref_count-1; i>=0; i--){ 2892 ref = h->short_ref[i]; 2893 assert(ref->reference); 2894 assert(!ref->long_ref); 2895 if( 2896 ref->frame_num == frame_num && 2897 (ref->reference & pic_structure) 2898 ) 2899 break; 2900 } 2901 if(i>=0) 2902 ref->pic_id= pred; 2903 }else{ 2904 int long_idx; 2905 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx 2906 2907 long_idx= pic_num_extract(h, pic_id, &pic_structure); 2908 2909 if(long_idx>31){ 2910 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n"); 2911 return -1; 2912 } 2913 ref = h->long_ref[long_idx]; 2914 assert(!(ref && !ref->reference)); 2915 if(ref && (ref->reference & pic_structure)){ 2916 ref->pic_id= pic_id; 2917 assert(ref->long_ref); 2918 i=0; 2919 }else{ 2920 i=-1; 2921 } 2922 } 2923 2924 if (i < 0) { 2925 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n"); 2926 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME 2927 } else { 2928 for(i=index; i+1<h->ref_count[list]; i++){ 2929 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id) 2930 break; 2931 } 2932 for(; i > index; i--){ 2933 h->ref_list[list][i]= h->ref_list[list][i-1]; 2934 } 2935 h->ref_list[list][index]= *ref; 2936 if (FIELD_PICTURE){ 2937 pic_as_field(&h->ref_list[list][index], pic_structure); 2938 } 2939 } 2940 }else{ 2941 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n"); 2942 return -1; 2943 } 2944 } 2945 } 2946 } 2947 for(list=0; list<h->list_count; list++){ 2948 for(index= 0; index < h->ref_count[list]; index++){ 2949 if(!h->ref_list[list][index].data[0]){ 2950 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n"); 2951 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution 2952 } 2953 } 2954 } 2955 2956 return 0; 2957} 2958 2959static void fill_mbaff_ref_list(H264Context *h){ 2960 int list, i, j; 2961 for(list=0; list<2; list++){ //FIXME try list_count 2962 for(i=0; i<h->ref_count[list]; i++){ 2963 Picture *frame = &h->ref_list[list][i]; 2964 Picture *field = &h->ref_list[list][16+2*i]; 2965 field[0] = *frame; 2966 for(j=0; j<3; j++) 2967 field[0].linesize[j] <<= 1; 2968 field[0].reference = PICT_TOP_FIELD; 2969 field[0].poc= field[0].field_poc[0]; 2970 field[1] = field[0]; 2971 for(j=0; j<3; j++) 2972 field[1].data[j] += frame->linesize[j]; 2973 field[1].reference = PICT_BOTTOM_FIELD; 2974 field[1].poc= field[1].field_poc[1]; 2975 2976 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i]; 2977 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i]; 2978 for(j=0; j<2; j++){ 2979 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j]; 2980 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j]; 2981 } 2982 } 2983 } 2984 for(j=0; j<h->ref_count[1]; j++){ 2985 for(i=0; i<h->ref_count[0]; i++) 2986 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i]; 2987 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight)); 2988 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight)); 2989 } 2990} 2991 2992static int pred_weight_table(H264Context *h){ 2993 MpegEncContext * const s = &h->s; 2994 int list, i; 2995 int luma_def, chroma_def; 2996 2997 h->use_weight= 0; 2998 h->use_weight_chroma= 0; 2999 h->luma_log2_weight_denom= get_ue_golomb(&s->gb); 3000 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); 3001 luma_def = 1<<h->luma_log2_weight_denom; 3002 chroma_def = 1<<h->chroma_log2_weight_denom; 3003 3004 for(list=0; list<2; list++){ 3005 h->luma_weight_flag[list] = 0; 3006 h->chroma_weight_flag[list] = 0; 3007 for(i=0; i<h->ref_count[list]; i++){ 3008 int luma_weight_flag, chroma_weight_flag; 3009 3010 luma_weight_flag= get_bits1(&s->gb); 3011 if(luma_weight_flag){ 3012 h->luma_weight[list][i]= get_se_golomb(&s->gb); 3013 h->luma_offset[list][i]= get_se_golomb(&s->gb); 3014 if( h->luma_weight[list][i] != luma_def 3015 || h->luma_offset[list][i] != 0) { 3016 h->use_weight= 1; 3017 h->luma_weight_flag[list]= 1; 3018 } 3019 }else{ 3020 h->luma_weight[list][i]= luma_def; 3021 h->luma_offset[list][i]= 0; 3022 } 3023 3024 if(CHROMA){ 3025 chroma_weight_flag= get_bits1(&s->gb); 3026 if(chroma_weight_flag){ 3027 int j; 3028 for(j=0; j<2; j++){ 3029 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb); 3030 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb); 3031 if( h->chroma_weight[list][i][j] != chroma_def 3032 || h->chroma_offset[list][i][j] != 0) { 3033 h->use_weight_chroma= 1; 3034 h->chroma_weight_flag[list]= 1; 3035 } 3036 } 3037 }else{ 3038 int j; 3039 for(j=0; j<2; j++){ 3040 h->chroma_weight[list][i][j]= chroma_def; 3041 h->chroma_offset[list][i][j]= 0; 3042 } 3043 } 3044 } 3045 } 3046 if(h->slice_type_nos != FF_B_TYPE) break; 3047 } 3048 h->use_weight= h->use_weight || h->use_weight_chroma; 3049 return 0; 3050} 3051 3052static void implicit_weight_table(H264Context *h){ 3053 MpegEncContext * const s = &h->s; 3054 int ref0, ref1, i; 3055 int cur_poc = s->current_picture_ptr->poc; 3056 3057 for (i = 0; i < 2; i++) { 3058 h->luma_weight_flag[i] = 0; 3059 h->chroma_weight_flag[i] = 0; 3060 } 3061 3062 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 3063 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ 3064 h->use_weight= 0; 3065 h->use_weight_chroma= 0; 3066 return; 3067 } 3068 3069 h->use_weight= 2; 3070 h->use_weight_chroma= 2; 3071 h->luma_log2_weight_denom= 5; 3072 h->chroma_log2_weight_denom= 5; 3073 3074 for(ref0=0; ref0 < h->ref_count[0]; ref0++){ 3075 int poc0 = h->ref_list[0][ref0].poc; 3076 for(ref1=0; ref1 < h->ref_count[1]; ref1++){ 3077 int poc1 = h->ref_list[1][ref1].poc; 3078 int td = av_clip(poc1 - poc0, -128, 127); 3079 if(td){ 3080 int tb = av_clip(cur_poc - poc0, -128, 127); 3081 int tx = (16384 + (FFABS(td) >> 1)) / td; 3082 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2; 3083 if(dist_scale_factor < -64 || dist_scale_factor > 128) 3084 h->implicit_weight[ref0][ref1] = 32; 3085 else 3086 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor; 3087 }else 3088 h->implicit_weight[ref0][ref1] = 32; 3089 } 3090 } 3091} 3092 3093/** 3094 * Mark a picture as no longer needed for reference. The refmask 3095 * argument allows unreferencing of individual fields or the whole frame. 3096 * If the picture becomes entirely unreferenced, but is being held for 3097 * display purposes, it is marked as such. 3098 * @param refmask mask of fields to unreference; the mask is bitwise 3099 * anded with the reference marking of pic 3100 * @return non-zero if pic becomes entirely unreferenced (except possibly 3101 * for display purposes) zero if one of the fields remains in 3102 * reference 3103 */ 3104static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){ 3105 int i; 3106 if (pic->reference &= refmask) { 3107 return 0; 3108 } else { 3109 for(i = 0; h->delayed_pic[i]; i++) 3110 if(pic == h->delayed_pic[i]){ 3111 pic->reference=DELAYED_PIC_REF; 3112 break; 3113 } 3114 return 1; 3115 } 3116} 3117 3118/** 3119 * instantaneous decoder refresh. 3120 */ 3121static void idr(H264Context *h){ 3122 int i; 3123 3124 for(i=0; i<16; i++){ 3125 remove_long(h, i, 0); 3126 } 3127 assert(h->long_ref_count==0); 3128 3129 for(i=0; i<h->short_ref_count; i++){ 3130 unreference_pic(h, h->short_ref[i], 0); 3131 h->short_ref[i]= NULL; 3132 } 3133 h->short_ref_count=0; 3134 h->prev_frame_num= 0; 3135 h->prev_frame_num_offset= 0; 3136 h->prev_poc_msb= 3137 h->prev_poc_lsb= 0; 3138} 3139 3140/* forget old pics after a seek */ 3141static void flush_dpb(AVCodecContext *avctx){ 3142 H264Context *h= avctx->priv_data; 3143 int i; 3144 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { 3145 if(h->delayed_pic[i]) 3146 h->delayed_pic[i]->reference= 0; 3147 h->delayed_pic[i]= NULL; 3148 } 3149 h->outputed_poc= INT_MIN; 3150 idr(h); 3151 if(h->s.current_picture_ptr) 3152 h->s.current_picture_ptr->reference= 0; 3153 h->s.first_field= 0; 3154 reset_sei(h); 3155 ff_mpeg_flush(avctx); 3156} 3157 3158/** 3159 * Find a Picture in the short term reference list by frame number. 3160 * @param frame_num frame number to search for 3161 * @param idx the index into h->short_ref where returned picture is found 3162 * undefined if no picture found. 3163 * @return pointer to the found picture, or NULL if no pic with the provided 3164 * frame number is found 3165 */ 3166static Picture * find_short(H264Context *h, int frame_num, int *idx){ 3167 MpegEncContext * const s = &h->s; 3168 int i; 3169 3170 for(i=0; i<h->short_ref_count; i++){ 3171 Picture *pic= h->short_ref[i]; 3172 if(s->avctx->debug&FF_DEBUG_MMCO) 3173 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic); 3174 if(pic->frame_num == frame_num) { 3175 *idx = i; 3176 return pic; 3177 } 3178 } 3179 return NULL; 3180} 3181 3182/** 3183 * Remove a picture from the short term reference list by its index in 3184 * that list. This does no checking on the provided index; it is assumed 3185 * to be valid. Other list entries are shifted down. 3186 * @param i index into h->short_ref of picture to remove. 3187 */ 3188static void remove_short_at_index(H264Context *h, int i){ 3189 assert(i >= 0 && i < h->short_ref_count); 3190 h->short_ref[i]= NULL; 3191 if (--h->short_ref_count) 3192 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*)); 3193} 3194 3195/** 3196 * 3197 * @return the removed picture or NULL if an error occurs 3198 */ 3199static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){ 3200 MpegEncContext * const s = &h->s; 3201 Picture *pic; 3202 int i; 3203 3204 if(s->avctx->debug&FF_DEBUG_MMCO) 3205 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count); 3206 3207 pic = find_short(h, frame_num, &i); 3208 if (pic){ 3209 if(unreference_pic(h, pic, ref_mask)) 3210 remove_short_at_index(h, i); 3211 } 3212 3213 return pic; 3214} 3215 3216/** 3217 * Remove a picture from the long term reference list by its index in 3218 * that list. 3219 * @return the removed picture or NULL if an error occurs 3220 */ 3221static Picture * remove_long(H264Context *h, int i, int ref_mask){ 3222 Picture *pic; 3223 3224 pic= h->long_ref[i]; 3225 if (pic){ 3226 if(unreference_pic(h, pic, ref_mask)){ 3227 assert(h->long_ref[i]->long_ref == 1); 3228 h->long_ref[i]->long_ref= 0; 3229 h->long_ref[i]= NULL; 3230 h->long_ref_count--; 3231 } 3232 } 3233 3234 return pic; 3235} 3236 3237/** 3238 * print short term list 3239 */ 3240static void print_short_term(H264Context *h) { 3241 uint32_t i; 3242 if(h->s.avctx->debug&FF_DEBUG_MMCO) { 3243 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n"); 3244 for(i=0; i<h->short_ref_count; i++){ 3245 Picture *pic= h->short_ref[i]; 3246 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); 3247 } 3248 } 3249} 3250 3251/** 3252 * print long term list 3253 */ 3254static void print_long_term(H264Context *h) { 3255 uint32_t i; 3256 if(h->s.avctx->debug&FF_DEBUG_MMCO) { 3257 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n"); 3258 for(i = 0; i < 16; i++){ 3259 Picture *pic= h->long_ref[i]; 3260 if (pic) { 3261 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); 3262 } 3263 } 3264 } 3265} 3266 3267/** 3268 * Executes the reference picture marking (memory management control operations). 3269 */ 3270static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ 3271 MpegEncContext * const s = &h->s; 3272 int i, j; 3273 int current_ref_assigned=0; 3274 Picture *av_uninit(pic); 3275 3276 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0) 3277 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n"); 3278 3279 for(i=0; i<mmco_count; i++){ 3280 int structure, av_uninit(frame_num); 3281 if(s->avctx->debug&FF_DEBUG_MMCO) 3282 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg); 3283 3284 if( mmco[i].opcode == MMCO_SHORT2UNUSED 3285 || mmco[i].opcode == MMCO_SHORT2LONG){ 3286 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure); 3287 pic = find_short(h, frame_num, &j); 3288 if(!pic){ 3289 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg] 3290 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num) 3291 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n"); 3292 continue; 3293 } 3294 } 3295 3296 switch(mmco[i].opcode){ 3297 case MMCO_SHORT2UNUSED: 3298 if(s->avctx->debug&FF_DEBUG_MMCO) 3299 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count); 3300 remove_short(h, frame_num, structure ^ PICT_FRAME); 3301 break; 3302 case MMCO_SHORT2LONG: 3303 if (h->long_ref[mmco[i].long_arg] != pic) 3304 remove_long(h, mmco[i].long_arg, 0); 3305 3306 remove_short_at_index(h, j); 3307 h->long_ref[ mmco[i].long_arg ]= pic; 3308 if (h->long_ref[ mmco[i].long_arg ]){ 3309 h->long_ref[ mmco[i].long_arg ]->long_ref=1; 3310 h->long_ref_count++; 3311 } 3312 break; 3313 case MMCO_LONG2UNUSED: 3314 j = pic_num_extract(h, mmco[i].long_arg, &structure); 3315 pic = h->long_ref[j]; 3316 if (pic) { 3317 remove_long(h, j, structure ^ PICT_FRAME); 3318 } else if(s->avctx->debug&FF_DEBUG_MMCO) 3319 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n"); 3320 break; 3321 case MMCO_LONG: 3322 // Comment below left from previous code as it is an interresting note. 3323 /* First field in pair is in short term list or 3324 * at a different long term index. 3325 * This is not allowed; see 7.4.3.3, notes 2 and 3. 3326 * Report the problem and keep the pair where it is, 3327 * and mark this field valid. 3328 */ 3329 3330 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) { 3331 remove_long(h, mmco[i].long_arg, 0); 3332 3333 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr; 3334 h->long_ref[ mmco[i].long_arg ]->long_ref=1; 3335 h->long_ref_count++; 3336 } 3337 3338 s->current_picture_ptr->reference |= s->picture_structure; 3339 current_ref_assigned=1; 3340 break; 3341 case MMCO_SET_MAX_LONG: 3342 assert(mmco[i].long_arg <= 16); 3343 // just remove the long term which index is greater than new max 3344 for(j = mmco[i].long_arg; j<16; j++){ 3345 remove_long(h, j, 0); 3346 } 3347 break; 3348 case MMCO_RESET: 3349 while(h->short_ref_count){ 3350 remove_short(h, h->short_ref[0]->frame_num, 0); 3351 } 3352 for(j = 0; j < 16; j++) { 3353 remove_long(h, j, 0); 3354 } 3355 s->current_picture_ptr->poc= 3356 s->current_picture_ptr->field_poc[0]= 3357 s->current_picture_ptr->field_poc[1]= 3358 h->poc_lsb= 3359 h->poc_msb= 3360 h->frame_num= 3361 s->current_picture_ptr->frame_num= 0; 3362 break; 3363 default: assert(0); 3364 } 3365 } 3366 3367 if (!current_ref_assigned) { 3368 /* Second field of complementary field pair; the first field of 3369 * which is already referenced. If short referenced, it 3370 * should be first entry in short_ref. If not, it must exist 3371 * in long_ref; trying to put it on the short list here is an 3372 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3). 3373 */ 3374 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) { 3375 /* Just mark the second field valid */ 3376 s->current_picture_ptr->reference = PICT_FRAME; 3377 } else if (s->current_picture_ptr->long_ref) { 3378 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference " 3379 "assignment for second field " 3380 "in complementary field pair " 3381 "(first field is long term)\n"); 3382 } else { 3383 pic= remove_short(h, s->current_picture_ptr->frame_num, 0); 3384 if(pic){ 3385 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n"); 3386 } 3387 3388 if(h->short_ref_count) 3389 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*)); 3390 3391 h->short_ref[0]= s->current_picture_ptr; 3392 h->short_ref_count++; 3393 s->current_picture_ptr->reference |= s->picture_structure; 3394 } 3395 } 3396 3397 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){ 3398 3399 /* We have too many reference frames, probably due to corrupted 3400 * stream. Need to discard one frame. Prevents overrun of the 3401 * short_ref and long_ref buffers. 3402 */ 3403 av_log(h->s.avctx, AV_LOG_ERROR, 3404 "number of reference frames exceeds max (probably " 3405 "corrupt input), discarding one\n"); 3406 3407 if (h->long_ref_count && !h->short_ref_count) { 3408 for (i = 0; i < 16; ++i) 3409 if (h->long_ref[i]) 3410 break; 3411 3412 assert(i < 16); 3413 remove_long(h, i, 0); 3414 } else { 3415 pic = h->short_ref[h->short_ref_count - 1]; 3416 remove_short(h, pic->frame_num, 0); 3417 } 3418 } 3419 3420 print_short_term(h); 3421 print_long_term(h); 3422 return 0; 3423} 3424 3425static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){ 3426 MpegEncContext * const s = &h->s; 3427 int i; 3428 3429 h->mmco_index= 0; 3430 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields 3431 s->broken_link= get_bits1(gb) -1; 3432 if(get_bits1(gb)){ 3433 h->mmco[0].opcode= MMCO_LONG; 3434 h->mmco[0].long_arg= 0; 3435 h->mmco_index= 1; 3436 } 3437 }else{ 3438 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag 3439 for(i= 0; i<MAX_MMCO_COUNT; i++) { 3440 MMCOOpcode opcode= get_ue_golomb_31(gb); 3441 3442 h->mmco[i].opcode= opcode; 3443 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){ 3444 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1); 3445/* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){ 3446 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco); 3447 return -1; 3448 }*/ 3449 } 3450 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){ 3451 unsigned int long_arg= get_ue_golomb_31(gb); 3452 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){ 3453 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode); 3454 return -1; 3455 } 3456 h->mmco[i].long_arg= long_arg; 3457 } 3458 3459 if(opcode > (unsigned)MMCO_LONG){ 3460 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode); 3461 return -1; 3462 } 3463 if(opcode == MMCO_END) 3464 break; 3465 } 3466 h->mmco_index= i; 3467 }else{ 3468 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count); 3469 3470 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count && 3471 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) { 3472 h->mmco[0].opcode= MMCO_SHORT2UNUSED; 3473 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num; 3474 h->mmco_index= 1; 3475 if (FIELD_PICTURE) { 3476 h->mmco[0].short_pic_num *= 2; 3477 h->mmco[1].opcode= MMCO_SHORT2UNUSED; 3478 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1; 3479 h->mmco_index= 2; 3480 } 3481 } 3482 } 3483 } 3484 3485 return 0; 3486} 3487 3488static int init_poc(H264Context *h){ 3489 MpegEncContext * const s = &h->s; 3490 const int max_frame_num= 1<<h->sps.log2_max_frame_num; 3491 int field_poc[2]; 3492 Picture *cur = s->current_picture_ptr; 3493 3494 h->frame_num_offset= h->prev_frame_num_offset; 3495 if(h->frame_num < h->prev_frame_num) 3496 h->frame_num_offset += max_frame_num; 3497 3498 if(h->sps.poc_type==0){ 3499 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; 3500 3501 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) 3502 h->poc_msb = h->prev_poc_msb + max_poc_lsb; 3503 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) 3504 h->poc_msb = h->prev_poc_msb - max_poc_lsb; 3505 else 3506 h->poc_msb = h->prev_poc_msb; 3507//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); 3508 field_poc[0] = 3509 field_poc[1] = h->poc_msb + h->poc_lsb; 3510 if(s->picture_structure == PICT_FRAME) 3511 field_poc[1] += h->delta_poc_bottom; 3512 }else if(h->sps.poc_type==1){ 3513 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; 3514 int i; 3515 3516 if(h->sps.poc_cycle_length != 0) 3517 abs_frame_num = h->frame_num_offset + h->frame_num; 3518 else 3519 abs_frame_num = 0; 3520 3521 if(h->nal_ref_idc==0 && abs_frame_num > 0) 3522 abs_frame_num--; 3523 3524 expected_delta_per_poc_cycle = 0; 3525 for(i=0; i < h->sps.poc_cycle_length; i++) 3526 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse 3527 3528 if(abs_frame_num > 0){ 3529 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; 3530 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; 3531 3532 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; 3533 for(i = 0; i <= frame_num_in_poc_cycle; i++) 3534 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; 3535 } else 3536 expectedpoc = 0; 3537 3538 if(h->nal_ref_idc == 0) 3539 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; 3540 3541 field_poc[0] = expectedpoc + h->delta_poc[0]; 3542 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; 3543 3544 if(s->picture_structure == PICT_FRAME) 3545 field_poc[1] += h->delta_poc[1]; 3546 }else{ 3547 int poc= 2*(h->frame_num_offset + h->frame_num); 3548 3549 if(!h->nal_ref_idc) 3550 poc--; 3551 3552 field_poc[0]= poc; 3553 field_poc[1]= poc; 3554 } 3555 3556 if(s->picture_structure != PICT_BOTTOM_FIELD) 3557 s->current_picture_ptr->field_poc[0]= field_poc[0]; 3558 if(s->picture_structure != PICT_TOP_FIELD) 3559 s->current_picture_ptr->field_poc[1]= field_poc[1]; 3560 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); 3561 3562 return 0; 3563} 3564 3565 3566/** 3567 * initialize scan tables 3568 */ 3569static void init_scan_tables(H264Context *h){ 3570 MpegEncContext * const s = &h->s; 3571 int i; 3572 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly 3573 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); 3574 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); 3575 }else{ 3576 for(i=0; i<16; i++){ 3577#define T(x) (x>>2) | ((x<<2) & 0xF) 3578 h->zigzag_scan[i] = T(zigzag_scan[i]); 3579 h-> field_scan[i] = T( field_scan[i]); 3580#undef T 3581 } 3582 } 3583 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ 3584 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); 3585 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); 3586 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); 3587 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); 3588 }else{ 3589 for(i=0; i<64; i++){ 3590#define T(x) (x>>3) | ((x&7)<<3) 3591 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); 3592 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); 3593 h->field_scan8x8[i] = T(field_scan8x8[i]); 3594 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); 3595#undef T 3596 } 3597 } 3598 if(h->sps.transform_bypass){ //FIXME same ugly 3599 h->zigzag_scan_q0 = zigzag_scan; 3600 h->zigzag_scan8x8_q0 = ff_zigzag_direct; 3601 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; 3602 h->field_scan_q0 = field_scan; 3603 h->field_scan8x8_q0 = field_scan8x8; 3604 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; 3605 }else{ 3606 h->zigzag_scan_q0 = h->zigzag_scan; 3607 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; 3608 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; 3609 h->field_scan_q0 = h->field_scan; 3610 h->field_scan8x8_q0 = h->field_scan8x8; 3611 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; 3612 } 3613} 3614 3615/** 3616 * Replicates H264 "master" context to thread contexts. 3617 */ 3618static void clone_slice(H264Context *dst, H264Context *src) 3619{ 3620 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); 3621 dst->s.current_picture_ptr = src->s.current_picture_ptr; 3622 dst->s.current_picture = src->s.current_picture; 3623 dst->s.linesize = src->s.linesize; 3624 dst->s.uvlinesize = src->s.uvlinesize; 3625 dst->s.first_field = src->s.first_field; 3626 3627 dst->prev_poc_msb = src->prev_poc_msb; 3628 dst->prev_poc_lsb = src->prev_poc_lsb; 3629 dst->prev_frame_num_offset = src->prev_frame_num_offset; 3630 dst->prev_frame_num = src->prev_frame_num; 3631 dst->short_ref_count = src->short_ref_count; 3632 3633 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); 3634 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); 3635 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); 3636 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); 3637 3638 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); 3639 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); 3640} 3641 3642/** 3643 * decodes a slice header. 3644 * This will also call MPV_common_init() and frame_start() as needed. 3645 * 3646 * @param h h264context 3647 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding) 3648 * 3649 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded 3650 */ 3651static int decode_slice_header(H264Context *h, H264Context *h0){ 3652 MpegEncContext * const s = &h->s; 3653 MpegEncContext * const s0 = &h0->s; 3654 unsigned int first_mb_in_slice; 3655 unsigned int pps_id; 3656 int num_ref_idx_active_override_flag; 3657 unsigned int slice_type, tmp, i, j; 3658 int default_ref_list_done = 0; 3659 int last_pic_structure; 3660 3661 s->dropable= h->nal_ref_idc == 0; 3662 3663 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){ 3664 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; 3665 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; 3666 }else{ 3667 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; 3668 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; 3669 } 3670 3671 first_mb_in_slice= get_ue_golomb(&s->gb); 3672 3673 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){ 3674 h0->current_slice = 0; 3675 if (!s0->first_field) 3676 s->current_picture_ptr= NULL; 3677 } 3678 3679 slice_type= get_ue_golomb_31(&s->gb); 3680 if(slice_type > 9){ 3681 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); 3682 return -1; 3683 } 3684 if(slice_type > 4){ 3685 slice_type -= 5; 3686 h->slice_type_fixed=1; 3687 }else 3688 h->slice_type_fixed=0; 3689 3690 slice_type= golomb_to_pict_type[ slice_type ]; 3691 if (slice_type == FF_I_TYPE 3692 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { 3693 default_ref_list_done = 1; 3694 } 3695 h->slice_type= slice_type; 3696 h->slice_type_nos= slice_type & 3; 3697 3698 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though 3699 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) { 3700 av_log(h->s.avctx, AV_LOG_ERROR, 3701 "B picture before any references, skipping\n"); 3702 return -1; 3703 } 3704 3705 pps_id= get_ue_golomb(&s->gb); 3706 if(pps_id>=MAX_PPS_COUNT){ 3707 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); 3708 return -1; 3709 } 3710 if(!h0->pps_buffers[pps_id]) { 3711 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n"); 3712 return -1; 3713 } 3714 h->pps= *h0->pps_buffers[pps_id]; 3715 3716 if(!h0->sps_buffers[h->pps.sps_id]) { 3717 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n"); 3718 return -1; 3719 } 3720 h->sps = *h0->sps_buffers[h->pps.sps_id]; 3721 3722 if(h == h0 && h->dequant_coeff_pps != pps_id){ 3723 h->dequant_coeff_pps = pps_id; 3724 init_dequant_tables(h); 3725 } 3726 3727 s->mb_width= h->sps.mb_width; 3728 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 3729 3730 h->b_stride= s->mb_width*4; 3731 h->b8_stride= s->mb_width*2; 3732 3733 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); 3734 if(h->sps.frame_mbs_only_flag) 3735 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); 3736 else 3737 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3); 3738 3739 if (s->context_initialized 3740 && ( s->width != s->avctx->width || s->height != s->avctx->height)) { 3741 if(h != h0) 3742 return -1; // width / height changed during parallelized decoding 3743 free_tables(h); 3744 flush_dpb(s->avctx); 3745 MPV_common_end(s); 3746 } 3747 if (!s->context_initialized) { 3748 if(h != h0) 3749 return -1; // we cant (re-)initialize context during parallel decoding 3750 if (MPV_common_init(s) < 0) 3751 return -1; 3752 s->first_field = 0; 3753 3754 init_scan_tables(h); 3755 alloc_tables(h); 3756 3757 for(i = 1; i < s->avctx->thread_count; i++) { 3758 H264Context *c; 3759 c = h->thread_context[i] = av_malloc(sizeof(H264Context)); 3760 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); 3761 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); 3762 c->sps = h->sps; 3763 c->pps = h->pps; 3764 init_scan_tables(c); 3765 clone_tables(c, h); 3766 } 3767 3768 for(i = 0; i < s->avctx->thread_count; i++) 3769 if(context_init(h->thread_context[i]) < 0) 3770 return -1; 3771 3772 s->avctx->width = s->width; 3773 s->avctx->height = s->height; 3774 s->avctx->sample_aspect_ratio= h->sps.sar; 3775 if(!s->avctx->sample_aspect_ratio.den) 3776 s->avctx->sample_aspect_ratio.den = 1; 3777 3778 if(h->sps.timing_info_present_flag){ 3779 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale}; 3780 if(h->x264_build > 0 && h->x264_build < 44) 3781 s->avctx->time_base.den *= 2; 3782 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, 3783 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); 3784 } 3785 } 3786 3787 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); 3788 3789 h->mb_mbaff = 0; 3790 h->mb_aff_frame = 0; 3791 last_pic_structure = s0->picture_structure; 3792 if(h->sps.frame_mbs_only_flag){ 3793 s->picture_structure= PICT_FRAME; 3794 }else{ 3795 if(get_bits1(&s->gb)) { //field_pic_flag 3796 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag 3797 } else { 3798 s->picture_structure= PICT_FRAME; 3799 h->mb_aff_frame = h->sps.mb_aff; 3800 } 3801 } 3802 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; 3803 3804 if(h0->current_slice == 0){ 3805 while(h->frame_num != h->prev_frame_num && 3806 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ 3807 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); 3808 if (frame_start(h) < 0) 3809 return -1; 3810 h->prev_frame_num++; 3811 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; 3812 s->current_picture_ptr->frame_num= h->prev_frame_num; 3813 execute_ref_pic_marking(h, NULL, 0); 3814 } 3815 3816 /* See if we have a decoded first field looking for a pair... */ 3817 if (s0->first_field) { 3818 assert(s0->current_picture_ptr); 3819 assert(s0->current_picture_ptr->data[0]); 3820 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); 3821 3822 /* figure out if we have a complementary field pair */ 3823 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { 3824 /* 3825 * Previous field is unmatched. Don't display it, but let it 3826 * remain for reference if marked as such. 3827 */ 3828 s0->current_picture_ptr = NULL; 3829 s0->first_field = FIELD_PICTURE; 3830 3831 } else { 3832 if (h->nal_ref_idc && 3833 s0->current_picture_ptr->reference && 3834 s0->current_picture_ptr->frame_num != h->frame_num) { 3835 /* 3836 * This and previous field were reference, but had 3837 * different frame_nums. Consider this field first in 3838 * pair. Throw away previous field except for reference 3839 * purposes. 3840 */ 3841 s0->first_field = 1; 3842 s0->current_picture_ptr = NULL; 3843 3844 } else { 3845 /* Second field in complementary pair */ 3846 s0->first_field = 0; 3847 } 3848 } 3849 3850 } else { 3851 /* Frame or first field in a potentially complementary pair */ 3852 assert(!s0->current_picture_ptr); 3853 s0->first_field = FIELD_PICTURE; 3854 } 3855 3856 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) { 3857 s0->first_field = 0; 3858 return -1; 3859 } 3860 } 3861 if(h != h0) 3862 clone_slice(h, h0); 3863 3864 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup 3865 3866 assert(s->mb_num == s->mb_width * s->mb_height); 3867 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || 3868 first_mb_in_slice >= s->mb_num){ 3869 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); 3870 return -1; 3871 } 3872 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; 3873 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; 3874 if (s->picture_structure == PICT_BOTTOM_FIELD) 3875 s->resync_mb_y = s->mb_y = s->mb_y + 1; 3876 assert(s->mb_y < s->mb_height); 3877 3878 if(s->picture_structure==PICT_FRAME){ 3879 h->curr_pic_num= h->frame_num; 3880 h->max_pic_num= 1<< h->sps.log2_max_frame_num; 3881 }else{ 3882 h->curr_pic_num= 2*h->frame_num + 1; 3883 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); 3884 } 3885 3886 if(h->nal_unit_type == NAL_IDR_SLICE){ 3887 get_ue_golomb(&s->gb); /* idr_pic_id */ 3888 } 3889 3890 if(h->sps.poc_type==0){ 3891 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); 3892 3893 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ 3894 h->delta_poc_bottom= get_se_golomb(&s->gb); 3895 } 3896 } 3897 3898 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ 3899 h->delta_poc[0]= get_se_golomb(&s->gb); 3900 3901 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) 3902 h->delta_poc[1]= get_se_golomb(&s->gb); 3903 } 3904 3905 init_poc(h); 3906 3907 if(h->pps.redundant_pic_cnt_present){ 3908 h->redundant_pic_count= get_ue_golomb(&s->gb); 3909 } 3910 3911 //set defaults, might be overridden a few lines later 3912 h->ref_count[0]= h->pps.ref_count[0]; 3913 h->ref_count[1]= h->pps.ref_count[1]; 3914 3915 if(h->slice_type_nos != FF_I_TYPE){ 3916 if(h->slice_type_nos == FF_B_TYPE){ 3917 h->direct_spatial_mv_pred= get_bits1(&s->gb); 3918 } 3919 num_ref_idx_active_override_flag= get_bits1(&s->gb); 3920 3921 if(num_ref_idx_active_override_flag){ 3922 h->ref_count[0]= get_ue_golomb(&s->gb) + 1; 3923 if(h->slice_type_nos==FF_B_TYPE) 3924 h->ref_count[1]= get_ue_golomb(&s->gb) + 1; 3925 3926 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){ 3927 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); 3928 h->ref_count[0]= h->ref_count[1]= 1; 3929 return -1; 3930 } 3931 } 3932 if(h->slice_type_nos == FF_B_TYPE) 3933 h->list_count= 2; 3934 else 3935 h->list_count= 1; 3936 }else 3937 h->list_count= 0; 3938 3939 if(!default_ref_list_done){ 3940 fill_default_ref_list(h); 3941 } 3942 3943 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0) 3944 return -1; 3945 3946 if(h->slice_type_nos!=FF_I_TYPE){ 3947 s->last_picture_ptr= &h->ref_list[0][0]; 3948 ff_copy_picture(&s->last_picture, s->last_picture_ptr); 3949 } 3950 if(h->slice_type_nos==FF_B_TYPE){ 3951 s->next_picture_ptr= &h->ref_list[1][0]; 3952 ff_copy_picture(&s->next_picture, s->next_picture_ptr); 3953 } 3954 3955 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE ) 3956 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) ) 3957 pred_weight_table(h); 3958 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE) 3959 implicit_weight_table(h); 3960 else { 3961 h->use_weight = 0; 3962 for (i = 0; i < 2; i++) { 3963 h->luma_weight_flag[i] = 0; 3964 h->chroma_weight_flag[i] = 0; 3965 } 3966 } 3967 3968 if(h->nal_ref_idc) 3969 decode_ref_pic_marking(h0, &s->gb); 3970 3971 if(FRAME_MBAFF) 3972 fill_mbaff_ref_list(h); 3973 3974 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred) 3975 direct_dist_scale_factor(h); 3976 direct_ref_list_init(h); 3977 3978 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){ 3979 tmp = get_ue_golomb_31(&s->gb); 3980 if(tmp > 2){ 3981 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); 3982 return -1; 3983 } 3984 h->cabac_init_idc= tmp; 3985 } 3986 3987 h->last_qscale_diff = 0; 3988 tmp = h->pps.init_qp + get_se_golomb(&s->gb); 3989 if(tmp>51){ 3990 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); 3991 return -1; 3992 } 3993 s->qscale= tmp; 3994 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 3995 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 3996 //FIXME qscale / qp ... stuff 3997 if(h->slice_type == FF_SP_TYPE){ 3998 get_bits1(&s->gb); /* sp_for_switch_flag */ 3999 } 4000 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){ 4001 get_se_golomb(&s->gb); /* slice_qs_delta */ 4002 } 4003 4004 h->deblocking_filter = 1; 4005 h->slice_alpha_c0_offset = 0; 4006 h->slice_beta_offset = 0; 4007 if( h->pps.deblocking_filter_parameters_present ) { 4008 tmp= get_ue_golomb_31(&s->gb); 4009 if(tmp > 2){ 4010 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); 4011 return -1; 4012 } 4013 h->deblocking_filter= tmp; 4014 if(h->deblocking_filter < 2) 4015 h->deblocking_filter^= 1; // 1<->0 4016 4017 if( h->deblocking_filter ) { 4018 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1; 4019 h->slice_beta_offset = get_se_golomb(&s->gb) << 1; 4020 } 4021 } 4022 4023 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL 4024 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE) 4025 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE) 4026 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 4027 h->deblocking_filter= 0; 4028 4029 if(h->deblocking_filter == 1 && h0->max_contexts > 1) { 4030 if(s->avctx->flags2 & CODEC_FLAG2_FAST) { 4031 /* Cheat slightly for speed: 4032 Do not bother to deblock across slices. */ 4033 h->deblocking_filter = 2; 4034 } else { 4035 h0->max_contexts = 1; 4036 if(!h0->single_decode_warning) { 4037 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); 4038 h0->single_decode_warning = 1; 4039 } 4040 if(h != h0) 4041 return 1; // deblocking switched inside frame 4042 } 4043 } 4044 4045#if 0 //FMO 4046 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) 4047 slice_group_change_cycle= get_bits(&s->gb, ?); 4048#endif 4049 4050 h0->last_slice_type = slice_type; 4051 h->slice_num = ++h0->current_slice; 4052 if(h->slice_num >= MAX_SLICES){ 4053 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); 4054 } 4055 4056 for(j=0; j<2; j++){ 4057 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; 4058 ref2frm[0]= 4059 ref2frm[1]= -1; 4060 for(i=0; i<16; i++) 4061 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num 4062 +(h->ref_list[j][i].reference&3); 4063 ref2frm[18+0]= 4064 ref2frm[18+1]= -1; 4065 for(i=16; i<48; i++) 4066 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num 4067 +(h->ref_list[j][i].reference&3); 4068 } 4069 4070 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; 4071 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; 4072 4073 s->avctx->refs= h->sps.ref_frame_count; 4074 4075 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 4076 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", 4077 h->slice_num, 4078 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), 4079 first_mb_in_slice, 4080 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", 4081 pps_id, h->frame_num, 4082 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], 4083 h->ref_count[0], h->ref_count[1], 4084 s->qscale, 4085 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2, 4086 h->use_weight, 4087 h->use_weight==1 && h->use_weight_chroma ? "c" : "", 4088 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" 4089 ); 4090 } 4091 4092 return 0; 4093} 4094 4095/** 4096 * 4097 */ 4098static inline int get_level_prefix(GetBitContext *gb){ 4099 unsigned int buf; 4100 int log; 4101 4102 OPEN_READER(re, gb); 4103 UPDATE_CACHE(re, gb); 4104 buf=GET_CACHE(re, gb); 4105 4106 log= 32 - av_log2(buf); 4107#ifdef TRACE 4108 print_bin(buf>>(32-log), log); 4109 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); 4110#endif 4111 4112 LAST_SKIP_BITS(re, gb, log); 4113 CLOSE_READER(re, gb); 4114 4115 return log-1; 4116} 4117 4118static inline int get_dct8x8_allowed(H264Context *h){ 4119 if(h->sps.direct_8x8_inference_flag) 4120 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); 4121 else 4122 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); 4123} 4124 4125/** 4126 * decodes a residual block. 4127 * @param n block index 4128 * @param scantable scantable 4129 * @param max_coeff number of coefficients in the block 4130 * @return <0 if an error occurred 4131 */ 4132static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){ 4133 MpegEncContext * const s = &h->s; 4134 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; 4135 int level[16]; 4136 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; 4137 4138 //FIXME put trailing_onex into the context 4139 4140 if(n == CHROMA_DC_BLOCK_INDEX){ 4141 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); 4142 total_coeff= coeff_token>>2; 4143 }else{ 4144 if(n == LUMA_DC_BLOCK_INDEX){ 4145 total_coeff= pred_non_zero_count(h, 0); 4146 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); 4147 total_coeff= coeff_token>>2; 4148 }else{ 4149 total_coeff= pred_non_zero_count(h, n); 4150 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); 4151 total_coeff= coeff_token>>2; 4152 h->non_zero_count_cache[ scan8[n] ]= total_coeff; 4153 } 4154 } 4155 4156 //FIXME set last_non_zero? 4157 4158 if(total_coeff==0) 4159 return 0; 4160 if(total_coeff > (unsigned)max_coeff) { 4161 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff); 4162 return -1; 4163 } 4164 4165 trailing_ones= coeff_token&3; 4166 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff); 4167 assert(total_coeff<=16); 4168 4169 i = show_bits(gb, 3); 4170 skip_bits(gb, trailing_ones); 4171 level[0] = 1-((i&4)>>1); 4172 level[1] = 1-((i&2) ); 4173 level[2] = 1-((i&1)<<1); 4174 4175 if(trailing_ones<total_coeff) { 4176 int mask, prefix; 4177 int suffix_length = total_coeff > 10 && trailing_ones < 3; 4178 int bitsi= show_bits(gb, LEVEL_TAB_BITS); 4179 int level_code= cavlc_level_tab[suffix_length][bitsi][0]; 4180 4181 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); 4182 if(level_code >= 100){ 4183 prefix= level_code - 100; 4184 if(prefix == LEVEL_TAB_BITS) 4185 prefix += get_level_prefix(gb); 4186 4187 //first coefficient has suffix_length equal to 0 or 1 4188 if(prefix<14){ //FIXME try to build a large unified VLC table for all this 4189 if(suffix_length) 4190 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part 4191 else 4192 level_code= (prefix<<suffix_length); //part 4193 }else if(prefix==14){ 4194 if(suffix_length) 4195 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part 4196 else 4197 level_code= prefix + get_bits(gb, 4); //part 4198 }else{ 4199 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part 4200 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense 4201 if(prefix>=16) 4202 level_code += (1<<(prefix-3))-4096; 4203 } 4204 4205 if(trailing_ones < 3) level_code += 2; 4206 4207 suffix_length = 2; 4208 mask= -(level_code&1); 4209 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask; 4210 }else{ 4211 if(trailing_ones < 3) level_code += (level_code>>31)|1; 4212 4213 suffix_length = 1; 4214 if(level_code + 3U > 6U) 4215 suffix_length++; 4216 level[trailing_ones]= level_code; 4217 } 4218 4219 //remaining coefficients have suffix_length > 0 4220 for(i=trailing_ones+1;i<total_coeff;i++) { 4221 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX }; 4222 int bitsi= show_bits(gb, LEVEL_TAB_BITS); 4223 level_code= cavlc_level_tab[suffix_length][bitsi][0]; 4224 4225 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]); 4226 if(level_code >= 100){ 4227 prefix= level_code - 100; 4228 if(prefix == LEVEL_TAB_BITS){ 4229 prefix += get_level_prefix(gb); 4230 } 4231 if(prefix<15){ 4232 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length); 4233 }else{ 4234 level_code = (15<<suffix_length) + get_bits(gb, prefix-3); 4235 if(prefix>=16) 4236 level_code += (1<<(prefix-3))-4096; 4237 } 4238 mask= -(level_code&1); 4239 level_code= (((2+level_code)>>1) ^ mask) - mask; 4240 } 4241 level[i]= level_code; 4242 4243 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length]) 4244 suffix_length++; 4245 } 4246 } 4247 4248 if(total_coeff == max_coeff) 4249 zeros_left=0; 4250 else{ 4251 if(n == CHROMA_DC_BLOCK_INDEX) 4252 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); 4253 else 4254 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1); 4255 } 4256 4257 coeff_num = zeros_left + total_coeff - 1; 4258 j = scantable[coeff_num]; 4259 if(n > 24){ 4260 block[j] = level[0]; 4261 for(i=1;i<total_coeff;i++) { 4262 if(zeros_left <= 0) 4263 run_before = 0; 4264 else if(zeros_left < 7){ 4265 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); 4266 }else{ 4267 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); 4268 } 4269 zeros_left -= run_before; 4270 coeff_num -= 1 + run_before; 4271 j= scantable[ coeff_num ]; 4272 4273 block[j]= level[i]; 4274 } 4275 }else{ 4276 block[j] = (level[0] * qmul[j] + 32)>>6; 4277 for(i=1;i<total_coeff;i++) { 4278 if(zeros_left <= 0) 4279 run_before = 0; 4280 else if(zeros_left < 7){ 4281 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1); 4282 }else{ 4283 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); 4284 } 4285 zeros_left -= run_before; 4286 coeff_num -= 1 + run_before; 4287 j= scantable[ coeff_num ]; 4288 4289 block[j]= (level[i] * qmul[j] + 32)>>6; 4290 } 4291 } 4292 4293 if(zeros_left<0){ 4294 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); 4295 return -1; 4296 } 4297 4298 return 0; 4299} 4300 4301static void predict_field_decoding_flag(H264Context *h){ 4302 MpegEncContext * const s = &h->s; 4303 const int mb_xy= h->mb_xy; 4304 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) 4305 ? s->current_picture.mb_type[mb_xy-1] 4306 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) 4307 ? s->current_picture.mb_type[mb_xy-s->mb_stride] 4308 : 0; 4309 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; 4310} 4311 4312/** 4313 * decodes a P_SKIP or B_SKIP macroblock 4314 */ 4315static void decode_mb_skip(H264Context *h){ 4316 MpegEncContext * const s = &h->s; 4317 const int mb_xy= h->mb_xy; 4318 int mb_type=0; 4319 4320 memset(h->non_zero_count[mb_xy], 0, 16); 4321 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui 4322 4323 if(MB_FIELD) 4324 mb_type|= MB_TYPE_INTERLACED; 4325 4326 if( h->slice_type_nos == FF_B_TYPE ) 4327 { 4328 // just for fill_caches. pred_direct_motion will set the real mb_type 4329 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; 4330 4331 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... 4332 pred_direct_motion(h, &mb_type); 4333 mb_type|= MB_TYPE_SKIP; 4334 } 4335 else 4336 { 4337 int mx, my; 4338 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; 4339 4340 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... 4341 pred_pskip_motion(h, &mx, &my); 4342 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); 4343 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); 4344 } 4345 4346 write_back_motion(h, mb_type); 4347 s->current_picture.mb_type[mb_xy]= mb_type; 4348 s->current_picture.qscale_table[mb_xy]= s->qscale; 4349 h->slice_table[ mb_xy ]= h->slice_num; 4350 h->prev_mb_skipped= 1; 4351} 4352 4353/** 4354 * decodes a macroblock 4355 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed 4356 */ 4357static int decode_mb_cavlc(H264Context *h){ 4358 MpegEncContext * const s = &h->s; 4359 int mb_xy; 4360 int partition_count; 4361 unsigned int mb_type, cbp; 4362 int dct8x8_allowed= h->pps.transform_8x8_mode; 4363 4364 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; 4365 4366 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); 4367 cbp = 0; /* avoid warning. FIXME: find a solution without slowing 4368 down the code */ 4369 if(h->slice_type_nos != FF_I_TYPE){ 4370 if(s->mb_skip_run==-1) 4371 s->mb_skip_run= get_ue_golomb(&s->gb); 4372 4373 if (s->mb_skip_run--) { 4374 if(FRAME_MBAFF && (s->mb_y&1) == 0){ 4375 if(s->mb_skip_run==0) 4376 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); 4377 else 4378 predict_field_decoding_flag(h); 4379 } 4380 decode_mb_skip(h); 4381 return 0; 4382 } 4383 } 4384 if(FRAME_MBAFF){ 4385 if( (s->mb_y&1) == 0 ) 4386 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); 4387 } 4388 4389 h->prev_mb_skipped= 0; 4390 4391 mb_type= get_ue_golomb(&s->gb); 4392 if(h->slice_type_nos == FF_B_TYPE){ 4393 if(mb_type < 23){ 4394 partition_count= b_mb_type_info[mb_type].partition_count; 4395 mb_type= b_mb_type_info[mb_type].type; 4396 }else{ 4397 mb_type -= 23; 4398 goto decode_intra_mb; 4399 } 4400 }else if(h->slice_type_nos == FF_P_TYPE){ 4401 if(mb_type < 5){ 4402 partition_count= p_mb_type_info[mb_type].partition_count; 4403 mb_type= p_mb_type_info[mb_type].type; 4404 }else{ 4405 mb_type -= 5; 4406 goto decode_intra_mb; 4407 } 4408 }else{ 4409 assert(h->slice_type_nos == FF_I_TYPE); 4410 if(h->slice_type == FF_SI_TYPE && mb_type) 4411 mb_type--; 4412decode_intra_mb: 4413 if(mb_type > 25){ 4414 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); 4415 return -1; 4416 } 4417 partition_count=0; 4418 cbp= i_mb_type_info[mb_type].cbp; 4419 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; 4420 mb_type= i_mb_type_info[mb_type].type; 4421 } 4422 4423 if(MB_FIELD) 4424 mb_type |= MB_TYPE_INTERLACED; 4425 4426 h->slice_table[ mb_xy ]= h->slice_num; 4427 4428 if(IS_INTRA_PCM(mb_type)){ 4429 unsigned int x; 4430 4431 // We assume these blocks are very rare so we do not optimize it. 4432 align_get_bits(&s->gb); 4433 4434 // The pixels are stored in the same order as levels in h->mb array. 4435 for(x=0; x < (CHROMA ? 384 : 256); x++){ 4436 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8); 4437 } 4438 4439 // In deblocking, the quantizer is 0 4440 s->current_picture.qscale_table[mb_xy]= 0; 4441 // All coeffs are present 4442 memset(h->non_zero_count[mb_xy], 16, 16); 4443 4444 s->current_picture.mb_type[mb_xy]= mb_type; 4445 return 0; 4446 } 4447 4448 if(MB_MBAFF){ 4449 h->ref_count[0] <<= 1; 4450 h->ref_count[1] <<= 1; 4451 } 4452 4453 fill_caches(h, mb_type, 0); 4454 4455 //mb_pred 4456 if(IS_INTRA(mb_type)){ 4457 int pred_mode; 4458// init_top_left_availability(h); 4459 if(IS_INTRA4x4(mb_type)){ 4460 int i; 4461 int di = 1; 4462 if(dct8x8_allowed && get_bits1(&s->gb)){ 4463 mb_type |= MB_TYPE_8x8DCT; 4464 di = 4; 4465 } 4466 4467// fill_intra4x4_pred_table(h); 4468 for(i=0; i<16; i+=di){ 4469 int mode= pred_intra_mode(h, i); 4470 4471 if(!get_bits1(&s->gb)){ 4472 const int rem_mode= get_bits(&s->gb, 3); 4473 mode = rem_mode + (rem_mode >= mode); 4474 } 4475 4476 if(di==4) 4477 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); 4478 else 4479 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; 4480 } 4481 write_back_intra_pred_mode(h); 4482 if( check_intra4x4_pred_mode(h) < 0) 4483 return -1; 4484 }else{ 4485 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode); 4486 if(h->intra16x16_pred_mode < 0) 4487 return -1; 4488 } 4489 if(CHROMA){ 4490 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb)); 4491 if(pred_mode < 0) 4492 return -1; 4493 h->chroma_pred_mode= pred_mode; 4494 } 4495 }else if(partition_count==4){ 4496 int i, j, sub_partition_count[4], list, ref[2][4]; 4497 4498 if(h->slice_type_nos == FF_B_TYPE){ 4499 for(i=0; i<4; i++){ 4500 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); 4501 if(h->sub_mb_type[i] >=13){ 4502 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); 4503 return -1; 4504 } 4505 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; 4506 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; 4507 } 4508 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) 4509 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { 4510 pred_direct_motion(h, &mb_type); 4511 h->ref_cache[0][scan8[4]] = 4512 h->ref_cache[1][scan8[4]] = 4513 h->ref_cache[0][scan8[12]] = 4514 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; 4515 } 4516 }else{ 4517 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ? 4518 for(i=0; i<4; i++){ 4519 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb); 4520 if(h->sub_mb_type[i] >=4){ 4521 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); 4522 return -1; 4523 } 4524 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; 4525 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; 4526 } 4527 } 4528 4529 for(list=0; list<h->list_count; list++){ 4530 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; 4531 for(i=0; i<4; i++){ 4532 if(IS_DIRECT(h->sub_mb_type[i])) continue; 4533 if(IS_DIR(h->sub_mb_type[i], 0, list)){ 4534 unsigned int tmp; 4535 if(ref_count == 1){ 4536 tmp= 0; 4537 }else if(ref_count == 2){ 4538 tmp= get_bits1(&s->gb)^1; 4539 }else{ 4540 tmp= get_ue_golomb_31(&s->gb); 4541 if(tmp>=ref_count){ 4542 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp); 4543 return -1; 4544 } 4545 } 4546 ref[list][i]= tmp; 4547 }else{ 4548 //FIXME 4549 ref[list][i] = -1; 4550 } 4551 } 4552 } 4553 4554 if(dct8x8_allowed) 4555 dct8x8_allowed = get_dct8x8_allowed(h); 4556 4557 for(list=0; list<h->list_count; list++){ 4558 for(i=0; i<4; i++){ 4559 if(IS_DIRECT(h->sub_mb_type[i])) { 4560 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ]; 4561 continue; 4562 } 4563 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]= 4564 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; 4565 4566 if(IS_DIR(h->sub_mb_type[i], 0, list)){ 4567 const int sub_mb_type= h->sub_mb_type[i]; 4568 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; 4569 for(j=0; j<sub_partition_count[i]; j++){ 4570 int mx, my; 4571 const int index= 4*i + block_width*j; 4572 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; 4573 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); 4574 mx += get_se_golomb(&s->gb); 4575 my += get_se_golomb(&s->gb); 4576 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 4577 4578 if(IS_SUB_8X8(sub_mb_type)){ 4579 mv_cache[ 1 ][0]= 4580 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; 4581 mv_cache[ 1 ][1]= 4582 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; 4583 }else if(IS_SUB_8X4(sub_mb_type)){ 4584 mv_cache[ 1 ][0]= mx; 4585 mv_cache[ 1 ][1]= my; 4586 }else if(IS_SUB_4X8(sub_mb_type)){ 4587 mv_cache[ 8 ][0]= mx; 4588 mv_cache[ 8 ][1]= my; 4589 } 4590 mv_cache[ 0 ][0]= mx; 4591 mv_cache[ 0 ][1]= my; 4592 } 4593 }else{ 4594 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; 4595 p[0] = p[1]= 4596 p[8] = p[9]= 0; 4597 } 4598 } 4599 } 4600 }else if(IS_DIRECT(mb_type)){ 4601 pred_direct_motion(h, &mb_type); 4602 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; 4603 }else{ 4604 int list, mx, my, i; 4605 //FIXME we should set ref_idx_l? to 0 if we use that later ... 4606 if(IS_16X16(mb_type)){ 4607 for(list=0; list<h->list_count; list++){ 4608 unsigned int val; 4609 if(IS_DIR(mb_type, 0, list)){ 4610 if(h->ref_count[list]==1){ 4611 val= 0; 4612 }else if(h->ref_count[list]==2){ 4613 val= get_bits1(&s->gb)^1; 4614 }else{ 4615 val= get_ue_golomb_31(&s->gb); 4616 if(val >= h->ref_count[list]){ 4617 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); 4618 return -1; 4619 } 4620 } 4621 }else 4622 val= LIST_NOT_USED&0xFF; 4623 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); 4624 } 4625 for(list=0; list<h->list_count; list++){ 4626 unsigned int val; 4627 if(IS_DIR(mb_type, 0, list)){ 4628 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); 4629 mx += get_se_golomb(&s->gb); 4630 my += get_se_golomb(&s->gb); 4631 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 4632 4633 val= pack16to32(mx,my); 4634 }else 4635 val=0; 4636 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4); 4637 } 4638 } 4639 else if(IS_16X8(mb_type)){ 4640 for(list=0; list<h->list_count; list++){ 4641 for(i=0; i<2; i++){ 4642 unsigned int val; 4643 if(IS_DIR(mb_type, i, list)){ 4644 if(h->ref_count[list] == 1){ 4645 val= 0; 4646 }else if(h->ref_count[list] == 2){ 4647 val= get_bits1(&s->gb)^1; 4648 }else{ 4649 val= get_ue_golomb_31(&s->gb); 4650 if(val >= h->ref_count[list]){ 4651 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); 4652 return -1; 4653 } 4654 } 4655 }else 4656 val= LIST_NOT_USED&0xFF; 4657 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); 4658 } 4659 } 4660 for(list=0; list<h->list_count; list++){ 4661 for(i=0; i<2; i++){ 4662 unsigned int val; 4663 if(IS_DIR(mb_type, i, list)){ 4664 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); 4665 mx += get_se_golomb(&s->gb); 4666 my += get_se_golomb(&s->gb); 4667 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 4668 4669 val= pack16to32(mx,my); 4670 }else 4671 val=0; 4672 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4); 4673 } 4674 } 4675 }else{ 4676 assert(IS_8X16(mb_type)); 4677 for(list=0; list<h->list_count; list++){ 4678 for(i=0; i<2; i++){ 4679 unsigned int val; 4680 if(IS_DIR(mb_type, i, list)){ //FIXME optimize 4681 if(h->ref_count[list]==1){ 4682 val= 0; 4683 }else if(h->ref_count[list]==2){ 4684 val= get_bits1(&s->gb)^1; 4685 }else{ 4686 val= get_ue_golomb_31(&s->gb); 4687 if(val >= h->ref_count[list]){ 4688 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val); 4689 return -1; 4690 } 4691 } 4692 }else 4693 val= LIST_NOT_USED&0xFF; 4694 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); 4695 } 4696 } 4697 for(list=0; list<h->list_count; list++){ 4698 for(i=0; i<2; i++){ 4699 unsigned int val; 4700 if(IS_DIR(mb_type, i, list)){ 4701 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); 4702 mx += get_se_golomb(&s->gb); 4703 my += get_se_golomb(&s->gb); 4704 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 4705 4706 val= pack16to32(mx,my); 4707 }else 4708 val=0; 4709 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4); 4710 } 4711 } 4712 } 4713 } 4714 4715 if(IS_INTER(mb_type)) 4716 write_back_motion(h, mb_type); 4717 4718 if(!IS_INTRA16x16(mb_type)){ 4719 cbp= get_ue_golomb(&s->gb); 4720 if(cbp > 47){ 4721 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y); 4722 return -1; 4723 } 4724 4725 if(CHROMA){ 4726 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp]; 4727 else cbp= golomb_to_inter_cbp [cbp]; 4728 }else{ 4729 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp]; 4730 else cbp= golomb_to_inter_cbp_gray[cbp]; 4731 } 4732 } 4733 h->cbp = cbp; 4734 4735 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ 4736 if(get_bits1(&s->gb)){ 4737 mb_type |= MB_TYPE_8x8DCT; 4738 h->cbp_table[mb_xy]= cbp; 4739 } 4740 } 4741 s->current_picture.mb_type[mb_xy]= mb_type; 4742 4743 if(cbp || IS_INTRA16x16(mb_type)){ 4744 int i8x8, i4x4, chroma_idx; 4745 int dquant; 4746 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; 4747 const uint8_t *scan, *scan8x8, *dc_scan; 4748 4749// fill_non_zero_count_cache(h); 4750 4751 if(IS_INTERLACED(mb_type)){ 4752 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; 4753 scan= s->qscale ? h->field_scan : h->field_scan_q0; 4754 dc_scan= luma_dc_field_scan; 4755 }else{ 4756 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; 4757 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; 4758 dc_scan= luma_dc_zigzag_scan; 4759 } 4760 4761 dquant= get_se_golomb(&s->gb); 4762 4763 if( dquant > 25 || dquant < -26 ){ 4764 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); 4765 return -1; 4766 } 4767 4768 s->qscale += dquant; 4769 if(((unsigned)s->qscale) > 51){ 4770 if(s->qscale<0) s->qscale+= 52; 4771 else s->qscale-= 52; 4772 } 4773 4774 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale); 4775 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale); 4776 if(IS_INTRA16x16(mb_type)){ 4777 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ 4778 return -1; //FIXME continue if partitioned and other return -1 too 4779 } 4780 4781 assert((cbp&15) == 0 || (cbp&15) == 15); 4782 4783 if(cbp&15){ 4784 for(i8x8=0; i8x8<4; i8x8++){ 4785 for(i4x4=0; i4x4<4; i4x4++){ 4786 const int index= i4x4 + 4*i8x8; 4787 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ 4788 return -1; 4789 } 4790 } 4791 } 4792 }else{ 4793 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); 4794 } 4795 }else{ 4796 for(i8x8=0; i8x8<4; i8x8++){ 4797 if(cbp & (1<<i8x8)){ 4798 if(IS_8x8DCT(mb_type)){ 4799 DCTELEM *buf = &h->mb[64*i8x8]; 4800 uint8_t *nnz; 4801 for(i4x4=0; i4x4<4; i4x4++){ 4802 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, 4803 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) 4804 return -1; 4805 } 4806 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 4807 nnz[0] += nnz[1] + nnz[8] + nnz[9]; 4808 }else{ 4809 for(i4x4=0; i4x4<4; i4x4++){ 4810 const int index= i4x4 + 4*i8x8; 4811 4812 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ 4813 return -1; 4814 } 4815 } 4816 } 4817 }else{ 4818 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 4819 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; 4820 } 4821 } 4822 } 4823 4824 if(cbp&0x30){ 4825 for(chroma_idx=0; chroma_idx<2; chroma_idx++) 4826 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ 4827 return -1; 4828 } 4829 } 4830 4831 if(cbp&0x20){ 4832 for(chroma_idx=0; chroma_idx<2; chroma_idx++){ 4833 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; 4834 for(i4x4=0; i4x4<4; i4x4++){ 4835 const int index= 16 + 4*chroma_idx + i4x4; 4836 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){ 4837 return -1; 4838 } 4839 } 4840 } 4841 }else{ 4842 uint8_t * const nnz= &h->non_zero_count_cache[0]; 4843 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = 4844 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; 4845 } 4846 }else{ 4847 uint8_t * const nnz= &h->non_zero_count_cache[0]; 4848 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); 4849 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = 4850 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; 4851 } 4852 s->current_picture.qscale_table[mb_xy]= s->qscale; 4853 write_back_non_zero_count(h); 4854 4855 if(MB_MBAFF){ 4856 h->ref_count[0] >>= 1; 4857 h->ref_count[1] >>= 1; 4858 } 4859 4860 return 0; 4861} 4862 4863static int decode_cabac_field_decoding_flag(H264Context *h) { 4864 MpegEncContext * const s = &h->s; 4865 const int mb_x = s->mb_x; 4866 const int mb_y = s->mb_y & ~1; 4867 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride; 4868 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride; 4869 4870 unsigned int ctx = 0; 4871 4872 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) { 4873 ctx += 1; 4874 } 4875 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { 4876 ctx += 1; 4877 } 4878 4879 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] ); 4880} 4881 4882static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { 4883 uint8_t *state= &h->cabac_state[ctx_base]; 4884 int mb_type; 4885 4886 if(intra_slice){ 4887 MpegEncContext * const s = &h->s; 4888 const int mba_xy = h->left_mb_xy[0]; 4889 const int mbb_xy = h->top_mb_xy; 4890 int ctx=0; 4891 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) 4892 ctx++; 4893 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) 4894 ctx++; 4895 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 ) 4896 return 0; /* I4x4 */ 4897 state += 2; 4898 }else{ 4899 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 ) 4900 return 0; /* I4x4 */ 4901 } 4902 4903 if( get_cabac_terminate( &h->cabac ) ) 4904 return 25; /* PCM */ 4905 4906 mb_type = 1; /* I16x16 */ 4907 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */ 4908 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */ 4909 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] ); 4910 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] ); 4911 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] ); 4912 return mb_type; 4913} 4914 4915static int decode_cabac_mb_type_b( H264Context *h ) { 4916 MpegEncContext * const s = &h->s; 4917 4918 const int mba_xy = h->left_mb_xy[0]; 4919 const int mbb_xy = h->top_mb_xy; 4920 int ctx = 0; 4921 int bits; 4922 assert(h->slice_type_nos == FF_B_TYPE); 4923 4924 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) 4925 ctx++; 4926 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) 4927 ctx++; 4928 4929 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) ) 4930 return 0; /* B_Direct_16x16 */ 4931 4932 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) { 4933 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ 4934 } 4935 4936 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3; 4937 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2; 4938 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1; 4939 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); 4940 if( bits < 8 ) 4941 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ 4942 else if( bits == 13 ) { 4943 return decode_cabac_intra_mb_type(h, 32, 0) + 23; 4944 } else if( bits == 14 ) 4945 return 11; /* B_L1_L0_8x16 */ 4946 else if( bits == 15 ) 4947 return 22; /* B_8x8 */ 4948 4949 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); 4950 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ 4951} 4952 4953static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) { 4954 MpegEncContext * const s = &h->s; 4955 int mba_xy, mbb_xy; 4956 int ctx = 0; 4957 4958 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches? 4959 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride; 4960 mba_xy = mb_xy - 1; 4961 if( (mb_y&1) 4962 && h->slice_table[mba_xy] == h->slice_num 4963 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) 4964 mba_xy += s->mb_stride; 4965 if( MB_FIELD ){ 4966 mbb_xy = mb_xy - s->mb_stride; 4967 if( !(mb_y&1) 4968 && h->slice_table[mbb_xy] == h->slice_num 4969 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) 4970 mbb_xy -= s->mb_stride; 4971 }else 4972 mbb_xy = mb_x + (mb_y-1)*s->mb_stride; 4973 }else{ 4974 int mb_xy = h->mb_xy; 4975 mba_xy = mb_xy - 1; 4976 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); 4977 } 4978 4979 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )) 4980 ctx++; 4981 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) 4982 ctx++; 4983 4984 if( h->slice_type_nos == FF_B_TYPE ) 4985 ctx += 13; 4986 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] ); 4987} 4988 4989static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { 4990 int mode = 0; 4991 4992 if( get_cabac( &h->cabac, &h->cabac_state[68] ) ) 4993 return pred_mode; 4994 4995 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] ); 4996 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] ); 4997 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] ); 4998 4999 if( mode >= pred_mode ) 5000 return mode + 1; 5001 else 5002 return mode; 5003} 5004 5005static int decode_cabac_mb_chroma_pre_mode( H264Context *h) { 5006 const int mba_xy = h->left_mb_xy[0]; 5007 const int mbb_xy = h->top_mb_xy; 5008 5009 int ctx = 0; 5010 5011 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */ 5012 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 ) 5013 ctx++; 5014 5015 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) 5016 ctx++; 5017 5018 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) 5019 return 0; 5020 5021 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) 5022 return 1; 5023 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) 5024 return 2; 5025 else 5026 return 3; 5027} 5028 5029static int decode_cabac_mb_cbp_luma( H264Context *h) { 5030 int cbp_b, cbp_a, ctx, cbp = 0; 5031 5032 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1; 5033 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1; 5034 5035 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04); 5036 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]); 5037 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08); 5038 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1; 5039 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01); 5040 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2; 5041 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02); 5042 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3; 5043 return cbp; 5044} 5045static int decode_cabac_mb_cbp_chroma( H264Context *h) { 5046 int ctx; 5047 int cbp_a, cbp_b; 5048 5049 cbp_a = (h->left_cbp>>4)&0x03; 5050 cbp_b = (h-> top_cbp>>4)&0x03; 5051 5052 ctx = 0; 5053 if( cbp_a > 0 ) ctx++; 5054 if( cbp_b > 0 ) ctx += 2; 5055 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) 5056 return 0; 5057 5058 ctx = 4; 5059 if( cbp_a == 2 ) ctx++; 5060 if( cbp_b == 2 ) ctx += 2; 5061 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ); 5062} 5063static int decode_cabac_mb_dqp( H264Context *h) { 5064 int ctx= h->last_qscale_diff != 0; 5065 int val = 0; 5066 5067 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) { 5068 ctx= 2+(ctx>>1); 5069 val++; 5070 if(val > 102) //prevent infinite loop 5071 return INT_MIN; 5072 } 5073 5074 if( val&0x01 ) 5075 return (val + 1)>>1 ; 5076 else 5077 return -((val + 1)>>1); 5078} 5079static int decode_cabac_p_mb_sub_type( H264Context *h ) { 5080 if( get_cabac( &h->cabac, &h->cabac_state[21] ) ) 5081 return 0; /* 8x8 */ 5082 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) ) 5083 return 1; /* 8x4 */ 5084 if( get_cabac( &h->cabac, &h->cabac_state[23] ) ) 5085 return 2; /* 4x8 */ 5086 return 3; /* 4x4 */ 5087} 5088static int decode_cabac_b_mb_sub_type( H264Context *h ) { 5089 int type; 5090 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) ) 5091 return 0; /* B_Direct_8x8 */ 5092 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) ) 5093 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */ 5094 type = 3; 5095 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) { 5096 if( get_cabac( &h->cabac, &h->cabac_state[39] ) ) 5097 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */ 5098 type += 4; 5099 } 5100 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] ); 5101 type += get_cabac( &h->cabac, &h->cabac_state[39] ); 5102 return type; 5103} 5104 5105static inline int decode_cabac_mb_transform_size( H264Context *h ) { 5106 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); 5107} 5108 5109static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { 5110 int refa = h->ref_cache[list][scan8[n] - 1]; 5111 int refb = h->ref_cache[list][scan8[n] - 8]; 5112 int ref = 0; 5113 int ctx = 0; 5114 5115 if( h->slice_type_nos == FF_B_TYPE) { 5116 if( refa > 0 && !h->direct_cache[scan8[n] - 1] ) 5117 ctx++; 5118 if( refb > 0 && !h->direct_cache[scan8[n] - 8] ) 5119 ctx += 2; 5120 } else { 5121 if( refa > 0 ) 5122 ctx++; 5123 if( refb > 0 ) 5124 ctx += 2; 5125 } 5126 5127 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) { 5128 ref++; 5129 ctx = (ctx>>2)+4; 5130 if(ref >= 32 /*h->ref_list[list]*/){ 5131 return -1; 5132 } 5133 } 5134 return ref; 5135} 5136 5137static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { 5138 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) + 5139 abs( h->mvd_cache[list][scan8[n] - 8][l] ); 5140 int ctxbase = (l == 0) ? 40 : 47; 5141 int mvd; 5142 int ctx = (amvd>2) + (amvd>32); 5143 5144 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx])) 5145 return 0; 5146 5147 mvd= 1; 5148 ctx= 3; 5149 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) { 5150 mvd++; 5151 if( ctx < 6 ) 5152 ctx++; 5153 } 5154 5155 if( mvd >= 9 ) { 5156 int k = 3; 5157 while( get_cabac_bypass( &h->cabac ) ) { 5158 mvd += 1 << k; 5159 k++; 5160 if(k>24){ 5161 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n"); 5162 return INT_MIN; 5163 } 5164 } 5165 while( k-- ) { 5166 if( get_cabac_bypass( &h->cabac ) ) 5167 mvd += 1 << k; 5168 } 5169 } 5170 return get_cabac_bypass_sign( &h->cabac, -mvd ); 5171} 5172 5173static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) { 5174 int nza, nzb; 5175 int ctx = 0; 5176 5177 if( is_dc ) { 5178 if( cat == 0 ) { 5179 nza = h->left_cbp&0x100; 5180 nzb = h-> top_cbp&0x100; 5181 } else { 5182 nza = (h->left_cbp>>(6+idx))&0x01; 5183 nzb = (h-> top_cbp>>(6+idx))&0x01; 5184 } 5185 } else { 5186 assert(cat == 1 || cat == 2 || cat == 4); 5187 nza = h->non_zero_count_cache[scan8[idx] - 1]; 5188 nzb = h->non_zero_count_cache[scan8[idx] - 8]; 5189 } 5190 5191 if( nza > 0 ) 5192 ctx++; 5193 5194 if( nzb > 0 ) 5195 ctx += 2; 5196 5197 return ctx + 4 * cat; 5198} 5199 5200DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = { 5201 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5202 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5203 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5204 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 5205}; 5206 5207static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { 5208 static const int significant_coeff_flag_offset[2][6] = { 5209 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 }, 5210 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 } 5211 }; 5212 static const int last_coeff_flag_offset[2][6] = { 5213 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 }, 5214 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 } 5215 }; 5216 static const int coeff_abs_level_m1_offset[6] = { 5217 227+0, 227+10, 227+20, 227+30, 227+39, 426 5218 }; 5219 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = { 5220 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 5221 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, 5222 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, 5223 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 }, 5224 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, 5225 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11, 5226 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 5227 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } 5228 }; 5229 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). 5230 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). 5231 * map node ctx => cabac ctx for level=1 */ 5232 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; 5233 /* map node ctx => cabac ctx for level>1 */ 5234 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; 5235 static const uint8_t coeff_abs_level_transition[2][8] = { 5236 /* update node ctx after decoding a level=1 */ 5237 { 1, 2, 3, 3, 4, 5, 6, 7 }, 5238 /* update node ctx after decoding a level>1 */ 5239 { 4, 4, 4, 4, 5, 6, 7, 7 } 5240 }; 5241 5242 int index[64]; 5243 5244 int av_unused last; 5245 int coeff_count = 0; 5246 int node_ctx = 0; 5247 5248 uint8_t *significant_coeff_ctx_base; 5249 uint8_t *last_coeff_ctx_base; 5250 uint8_t *abs_level_m1_ctx_base; 5251 5252#if !ARCH_X86 5253#define CABAC_ON_STACK 5254#endif 5255#ifdef CABAC_ON_STACK 5256#define CC &cc 5257 CABACContext cc; 5258 cc.range = h->cabac.range; 5259 cc.low = h->cabac.low; 5260 cc.bytestream= h->cabac.bytestream; 5261#else 5262#define CC &h->cabac 5263#endif 5264 5265 5266 /* cat: 0-> DC 16x16 n = 0 5267 * 1-> AC 16x16 n = luma4x4idx 5268 * 2-> Luma4x4 n = luma4x4idx 5269 * 3-> DC Chroma n = iCbCr 5270 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx 5271 * 5-> Luma8x8 n = 4 * luma8x8idx 5272 */ 5273 5274 /* read coded block flag */ 5275 if( is_dc || cat != 5 ) { 5276 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) { 5277 if( !is_dc ) 5278 h->non_zero_count_cache[scan8[n]] = 0; 5279 5280#ifdef CABAC_ON_STACK 5281 h->cabac.range = cc.range ; 5282 h->cabac.low = cc.low ; 5283 h->cabac.bytestream= cc.bytestream; 5284#endif 5285 return; 5286 } 5287 } 5288 5289 significant_coeff_ctx_base = h->cabac_state 5290 + significant_coeff_flag_offset[MB_FIELD][cat]; 5291 last_coeff_ctx_base = h->cabac_state 5292 + last_coeff_flag_offset[MB_FIELD][cat]; 5293 abs_level_m1_ctx_base = h->cabac_state 5294 + coeff_abs_level_m1_offset[cat]; 5295 5296 if( !is_dc && cat == 5 ) { 5297#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \ 5298 for(last= 0; last < coefs; last++) { \ 5299 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \ 5300 if( get_cabac( CC, sig_ctx )) { \ 5301 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \ 5302 index[coeff_count++] = last; \ 5303 if( get_cabac( CC, last_ctx ) ) { \ 5304 last= max_coeff; \ 5305 break; \ 5306 } \ 5307 } \ 5308 }\ 5309 if( last == max_coeff -1 ) {\ 5310 index[coeff_count++] = last;\ 5311 } 5312 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; 5313#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) 5314 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off); 5315 } else { 5316 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index); 5317#else 5318 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); 5319 } else { 5320 DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); 5321#endif 5322 } 5323 assert(coeff_count > 0); 5324 5325 if( is_dc ) { 5326 if( cat == 0 ) 5327 h->cbp_table[h->mb_xy] |= 0x100; 5328 else 5329 h->cbp_table[h->mb_xy] |= 0x40 << n; 5330 } else { 5331 if( cat == 5 ) 5332 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); 5333 else { 5334 assert( cat == 1 || cat == 2 || cat == 4 ); 5335 h->non_zero_count_cache[scan8[n]] = coeff_count; 5336 } 5337 } 5338 5339 do { 5340 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base; 5341 5342 int j= scantable[index[--coeff_count]]; 5343 5344 if( get_cabac( CC, ctx ) == 0 ) { 5345 node_ctx = coeff_abs_level_transition[0][node_ctx]; 5346 if( is_dc ) { 5347 block[j] = get_cabac_bypass_sign( CC, -1); 5348 }else{ 5349 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; 5350 } 5351 } else { 5352 int coeff_abs = 2; 5353 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; 5354 node_ctx = coeff_abs_level_transition[1][node_ctx]; 5355 5356 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { 5357 coeff_abs++; 5358 } 5359 5360 if( coeff_abs >= 15 ) { 5361 int j = 0; 5362 while( get_cabac_bypass( CC ) ) { 5363 j++; 5364 } 5365 5366 coeff_abs=1; 5367 while( j-- ) { 5368 coeff_abs += coeff_abs + get_cabac_bypass( CC ); 5369 } 5370 coeff_abs+= 14; 5371 } 5372 5373 if( is_dc ) { 5374 block[j] = get_cabac_bypass_sign( CC, -coeff_abs ); 5375 }else{ 5376 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6; 5377 } 5378 } 5379 } while( coeff_count ); 5380#ifdef CABAC_ON_STACK 5381 h->cabac.range = cc.range ; 5382 h->cabac.low = cc.low ; 5383 h->cabac.bytestream= cc.bytestream; 5384#endif 5385 5386} 5387 5388#if !CONFIG_SMALL 5389static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { 5390 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1); 5391} 5392 5393static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { 5394 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); 5395} 5396#endif 5397 5398static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { 5399#if CONFIG_SMALL 5400 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3); 5401#else 5402 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff); 5403 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff); 5404#endif 5405} 5406 5407static inline void compute_mb_neighbors(H264Context *h) 5408{ 5409 MpegEncContext * const s = &h->s; 5410 const int mb_xy = h->mb_xy; 5411 h->top_mb_xy = mb_xy - s->mb_stride; 5412 h->left_mb_xy[0] = mb_xy - 1; 5413 if(FRAME_MBAFF){ 5414 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; 5415 const int top_pair_xy = pair_xy - s->mb_stride; 5416 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); 5417 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); 5418 const int curr_mb_field_flag = MB_FIELD; 5419 const int bottom = (s->mb_y & 1); 5420 5421 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ 5422 h->top_mb_xy -= s->mb_stride; 5423 } 5424 if (!left_mb_field_flag == curr_mb_field_flag) { 5425 h->left_mb_xy[0] = pair_xy - 1; 5426 } 5427 } else if (FIELD_PICTURE) { 5428 h->top_mb_xy -= s->mb_stride; 5429 } 5430 return; 5431} 5432 5433/** 5434 * decodes a macroblock 5435 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed 5436 */ 5437static int decode_mb_cabac(H264Context *h) { 5438 MpegEncContext * const s = &h->s; 5439 int mb_xy; 5440 int mb_type, partition_count, cbp = 0; 5441 int dct8x8_allowed= h->pps.transform_8x8_mode; 5442 5443 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride; 5444 5445 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); 5446 if( h->slice_type_nos != FF_I_TYPE ) { 5447 int skip; 5448 /* a skipped mb needs the aff flag from the following mb */ 5449 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 ) 5450 predict_field_decoding_flag(h); 5451 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped ) 5452 skip = h->next_mb_skipped; 5453 else 5454 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y ); 5455 /* read skip flags */ 5456 if( skip ) { 5457 if( FRAME_MBAFF && (s->mb_y&1)==0 ){ 5458 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP; 5459 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 ); 5460 if(!h->next_mb_skipped) 5461 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); 5462 } 5463 5464 decode_mb_skip(h); 5465 5466 h->cbp_table[mb_xy] = 0; 5467 h->chroma_pred_mode_table[mb_xy] = 0; 5468 h->last_qscale_diff = 0; 5469 5470 return 0; 5471 5472 } 5473 } 5474 if(FRAME_MBAFF){ 5475 if( (s->mb_y&1) == 0 ) 5476 h->mb_mbaff = 5477 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); 5478 } 5479 5480 h->prev_mb_skipped = 0; 5481 5482 compute_mb_neighbors(h); 5483 5484 if( h->slice_type_nos == FF_B_TYPE ) { 5485 mb_type = decode_cabac_mb_type_b( h ); 5486 if( mb_type < 23 ){ 5487 partition_count= b_mb_type_info[mb_type].partition_count; 5488 mb_type= b_mb_type_info[mb_type].type; 5489 }else{ 5490 mb_type -= 23; 5491 goto decode_intra_mb; 5492 } 5493 } else if( h->slice_type_nos == FF_P_TYPE ) { 5494 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) { 5495 /* P-type */ 5496 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) { 5497 /* P_L0_D16x16, P_8x8 */ 5498 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] ); 5499 } else { 5500 /* P_L0_D8x16, P_L0_D16x8 */ 5501 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] ); 5502 } 5503 partition_count= p_mb_type_info[mb_type].partition_count; 5504 mb_type= p_mb_type_info[mb_type].type; 5505 } else { 5506 mb_type= decode_cabac_intra_mb_type(h, 17, 0); 5507 goto decode_intra_mb; 5508 } 5509 } else { 5510 mb_type= decode_cabac_intra_mb_type(h, 3, 1); 5511 if(h->slice_type == FF_SI_TYPE && mb_type) 5512 mb_type--; 5513 assert(h->slice_type_nos == FF_I_TYPE); 5514decode_intra_mb: 5515 partition_count = 0; 5516 cbp= i_mb_type_info[mb_type].cbp; 5517 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; 5518 mb_type= i_mb_type_info[mb_type].type; 5519 } 5520 if(MB_FIELD) 5521 mb_type |= MB_TYPE_INTERLACED; 5522 5523 h->slice_table[ mb_xy ]= h->slice_num; 5524 5525 if(IS_INTRA_PCM(mb_type)) { 5526 const uint8_t *ptr; 5527 5528 // We assume these blocks are very rare so we do not optimize it. 5529 // FIXME The two following lines get the bitstream position in the cabac 5530 // decode, I think it should be done by a function in cabac.h (or cabac.c). 5531 ptr= h->cabac.bytestream; 5532 if(h->cabac.low&0x1) ptr--; 5533 if(CABAC_BITS==16){ 5534 if(h->cabac.low&0x1FF) ptr--; 5535 } 5536 5537 // The pixels are stored in the same order as levels in h->mb array. 5538 memcpy(h->mb, ptr, 256); ptr+=256; 5539 if(CHROMA){ 5540 memcpy(h->mb+128, ptr, 128); ptr+=128; 5541 } 5542 5543 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr); 5544 5545 // All blocks are present 5546 h->cbp_table[mb_xy] = 0x1ef; 5547 h->chroma_pred_mode_table[mb_xy] = 0; 5548 // In deblocking, the quantizer is 0 5549 s->current_picture.qscale_table[mb_xy]= 0; 5550 // All coeffs are present 5551 memset(h->non_zero_count[mb_xy], 16, 16); 5552 s->current_picture.mb_type[mb_xy]= mb_type; 5553 h->last_qscale_diff = 0; 5554 return 0; 5555 } 5556 5557 if(MB_MBAFF){ 5558 h->ref_count[0] <<= 1; 5559 h->ref_count[1] <<= 1; 5560 } 5561 5562 fill_caches(h, mb_type, 0); 5563 5564 if( IS_INTRA( mb_type ) ) { 5565 int i, pred_mode; 5566 if( IS_INTRA4x4( mb_type ) ) { 5567 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) { 5568 mb_type |= MB_TYPE_8x8DCT; 5569 for( i = 0; i < 16; i+=4 ) { 5570 int pred = pred_intra_mode( h, i ); 5571 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred ); 5572 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); 5573 } 5574 } else { 5575 for( i = 0; i < 16; i++ ) { 5576 int pred = pred_intra_mode( h, i ); 5577 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred ); 5578 5579 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] ); 5580 } 5581 } 5582 write_back_intra_pred_mode(h); 5583 if( check_intra4x4_pred_mode(h) < 0 ) return -1; 5584 } else { 5585 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode ); 5586 if( h->intra16x16_pred_mode < 0 ) return -1; 5587 } 5588 if(CHROMA){ 5589 h->chroma_pred_mode_table[mb_xy] = 5590 pred_mode = decode_cabac_mb_chroma_pre_mode( h ); 5591 5592 pred_mode= check_intra_pred_mode( h, pred_mode ); 5593 if( pred_mode < 0 ) return -1; 5594 h->chroma_pred_mode= pred_mode; 5595 } 5596 } else if( partition_count == 4 ) { 5597 int i, j, sub_partition_count[4], list, ref[2][4]; 5598 5599 if( h->slice_type_nos == FF_B_TYPE ) { 5600 for( i = 0; i < 4; i++ ) { 5601 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h ); 5602 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; 5603 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; 5604 } 5605 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] | 5606 h->sub_mb_type[2] | h->sub_mb_type[3]) ) { 5607 pred_direct_motion(h, &mb_type); 5608 h->ref_cache[0][scan8[4]] = 5609 h->ref_cache[1][scan8[4]] = 5610 h->ref_cache[0][scan8[12]] = 5611 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; 5612 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) { 5613 for( i = 0; i < 4; i++ ) 5614 if( IS_DIRECT(h->sub_mb_type[i]) ) 5615 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 ); 5616 } 5617 } 5618 } else { 5619 for( i = 0; i < 4; i++ ) { 5620 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h ); 5621 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; 5622 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; 5623 } 5624 } 5625 5626 for( list = 0; list < h->list_count; list++ ) { 5627 for( i = 0; i < 4; i++ ) { 5628 if(IS_DIRECT(h->sub_mb_type[i])) continue; 5629 if(IS_DIR(h->sub_mb_type[i], 0, list)){ 5630 if( h->ref_count[list] > 1 ){ 5631 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i ); 5632 if(ref[list][i] >= (unsigned)h->ref_count[list]){ 5633 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]); 5634 return -1; 5635 } 5636 }else 5637 ref[list][i] = 0; 5638 } else { 5639 ref[list][i] = -1; 5640 } 5641 h->ref_cache[list][ scan8[4*i]+1 ]= 5642 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; 5643 } 5644 } 5645 5646 if(dct8x8_allowed) 5647 dct8x8_allowed = get_dct8x8_allowed(h); 5648 5649 for(list=0; list<h->list_count; list++){ 5650 for(i=0; i<4; i++){ 5651 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]; 5652 if(IS_DIRECT(h->sub_mb_type[i])){ 5653 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4); 5654 continue; 5655 } 5656 5657 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ 5658 const int sub_mb_type= h->sub_mb_type[i]; 5659 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; 5660 for(j=0; j<sub_partition_count[i]; j++){ 5661 int mpx, mpy; 5662 int mx, my; 5663 const int index= 4*i + block_width*j; 5664 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ]; 5665 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ]; 5666 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy); 5667 5668 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 ); 5669 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 ); 5670 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 5671 5672 if(IS_SUB_8X8(sub_mb_type)){ 5673 mv_cache[ 1 ][0]= 5674 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; 5675 mv_cache[ 1 ][1]= 5676 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; 5677 5678 mvd_cache[ 1 ][0]= 5679 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx; 5680 mvd_cache[ 1 ][1]= 5681 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy; 5682 }else if(IS_SUB_8X4(sub_mb_type)){ 5683 mv_cache[ 1 ][0]= mx; 5684 mv_cache[ 1 ][1]= my; 5685 5686 mvd_cache[ 1 ][0]= mx - mpx; 5687 mvd_cache[ 1 ][1]= my - mpy; 5688 }else if(IS_SUB_4X8(sub_mb_type)){ 5689 mv_cache[ 8 ][0]= mx; 5690 mv_cache[ 8 ][1]= my; 5691 5692 mvd_cache[ 8 ][0]= mx - mpx; 5693 mvd_cache[ 8 ][1]= my - mpy; 5694 } 5695 mv_cache[ 0 ][0]= mx; 5696 mv_cache[ 0 ][1]= my; 5697 5698 mvd_cache[ 0 ][0]= mx - mpx; 5699 mvd_cache[ 0 ][1]= my - mpy; 5700 } 5701 }else{ 5702 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; 5703 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0]; 5704 p[0] = p[1] = p[8] = p[9] = 0; 5705 pd[0]= pd[1]= pd[8]= pd[9]= 0; 5706 } 5707 } 5708 } 5709 } else if( IS_DIRECT(mb_type) ) { 5710 pred_direct_motion(h, &mb_type); 5711 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); 5712 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); 5713 dct8x8_allowed &= h->sps.direct_8x8_inference_flag; 5714 } else { 5715 int list, mx, my, i, mpx, mpy; 5716 if(IS_16X16(mb_type)){ 5717 for(list=0; list<h->list_count; list++){ 5718 if(IS_DIR(mb_type, 0, list)){ 5719 int ref; 5720 if(h->ref_count[list] > 1){ 5721 ref= decode_cabac_mb_ref(h, list, 0); 5722 if(ref >= (unsigned)h->ref_count[list]){ 5723 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); 5724 return -1; 5725 } 5726 }else 5727 ref=0; 5728 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1); 5729 }else 5730 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too 5731 } 5732 for(list=0; list<h->list_count; list++){ 5733 if(IS_DIR(mb_type, 0, list)){ 5734 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy); 5735 5736 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 ); 5737 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 ); 5738 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 5739 5740 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4); 5741 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); 5742 }else 5743 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); 5744 } 5745 } 5746 else if(IS_16X8(mb_type)){ 5747 for(list=0; list<h->list_count; list++){ 5748 for(i=0; i<2; i++){ 5749 if(IS_DIR(mb_type, i, list)){ 5750 int ref; 5751 if(h->ref_count[list] > 1){ 5752 ref= decode_cabac_mb_ref( h, list, 8*i ); 5753 if(ref >= (unsigned)h->ref_count[list]){ 5754 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); 5755 return -1; 5756 } 5757 }else 5758 ref=0; 5759 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1); 5760 }else 5761 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); 5762 } 5763 } 5764 for(list=0; list<h->list_count; list++){ 5765 for(i=0; i<2; i++){ 5766 if(IS_DIR(mb_type, i, list)){ 5767 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy); 5768 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 ); 5769 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 ); 5770 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 5771 5772 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4); 5773 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); 5774 }else{ 5775 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); 5776 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); 5777 } 5778 } 5779 } 5780 }else{ 5781 assert(IS_8X16(mb_type)); 5782 for(list=0; list<h->list_count; list++){ 5783 for(i=0; i<2; i++){ 5784 if(IS_DIR(mb_type, i, list)){ //FIXME optimize 5785 int ref; 5786 if(h->ref_count[list] > 1){ 5787 ref= decode_cabac_mb_ref( h, list, 4*i ); 5788 if(ref >= (unsigned)h->ref_count[list]){ 5789 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]); 5790 return -1; 5791 } 5792 }else 5793 ref=0; 5794 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1); 5795 }else 5796 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); 5797 } 5798 } 5799 for(list=0; list<h->list_count; list++){ 5800 for(i=0; i<2; i++){ 5801 if(IS_DIR(mb_type, i, list)){ 5802 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy); 5803 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 ); 5804 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 ); 5805 5806 tprintf(s->avctx, "final mv:%d %d\n", mx, my); 5807 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4); 5808 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); 5809 }else{ 5810 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); 5811 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); 5812 } 5813 } 5814 } 5815 } 5816 } 5817 5818 if( IS_INTER( mb_type ) ) { 5819 h->chroma_pred_mode_table[mb_xy] = 0; 5820 write_back_motion( h, mb_type ); 5821 } 5822 5823 if( !IS_INTRA16x16( mb_type ) ) { 5824 cbp = decode_cabac_mb_cbp_luma( h ); 5825 if(CHROMA) 5826 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4; 5827 } 5828 5829 h->cbp_table[mb_xy] = h->cbp = cbp; 5830 5831 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) { 5832 if( decode_cabac_mb_transform_size( h ) ) 5833 mb_type |= MB_TYPE_8x8DCT; 5834 } 5835 s->current_picture.mb_type[mb_xy]= mb_type; 5836 5837 if( cbp || IS_INTRA16x16( mb_type ) ) { 5838 const uint8_t *scan, *scan8x8, *dc_scan; 5839 const uint32_t *qmul; 5840 int dqp; 5841 5842 if(IS_INTERLACED(mb_type)){ 5843 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; 5844 scan= s->qscale ? h->field_scan : h->field_scan_q0; 5845 dc_scan= luma_dc_field_scan; 5846 }else{ 5847 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; 5848 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; 5849 dc_scan= luma_dc_zigzag_scan; 5850 } 5851 5852 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); 5853 if( dqp == INT_MIN ){ 5854 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y); 5855 return -1; 5856 } 5857 s->qscale += dqp; 5858 if(((unsigned)s->qscale) > 51){ 5859 if(s->qscale<0) s->qscale+= 52; 5860 else s->qscale-= 52; 5861 } 5862 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 5863 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 5864 5865 if( IS_INTRA16x16( mb_type ) ) { 5866 int i; 5867 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); 5868 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16); 5869 5870 if( cbp&15 ) { 5871 qmul = h->dequant4_coeff[0][s->qscale]; 5872 for( i = 0; i < 16; i++ ) { 5873 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); 5874 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15); 5875 } 5876 } else { 5877 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); 5878 } 5879 } else { 5880 int i8x8, i4x4; 5881 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { 5882 if( cbp & (1<<i8x8) ) { 5883 if( IS_8x8DCT(mb_type) ) { 5884 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, 5885 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); 5886 } else { 5887 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale]; 5888 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { 5889 const int index = 4*i8x8 + i4x4; 5890 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); 5891//START_TIMER 5892 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16); 5893//STOP_TIMER("decode_residual") 5894 } 5895 } 5896 } else { 5897 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; 5898 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; 5899 } 5900 } 5901 } 5902 5903 if( cbp&0x30 ){ 5904 int c; 5905 for( c = 0; c < 2; c++ ) { 5906 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); 5907 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4); 5908 } 5909 } 5910 5911 if( cbp&0x20 ) { 5912 int c, i; 5913 for( c = 0; c < 2; c++ ) { 5914 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; 5915 for( i = 0; i < 4; i++ ) { 5916 const int index = 16 + 4 * c + i; 5917 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); 5918 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15); 5919 } 5920 } 5921 } else { 5922 uint8_t * const nnz= &h->non_zero_count_cache[0]; 5923 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = 5924 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; 5925 } 5926 } else { 5927 uint8_t * const nnz= &h->non_zero_count_cache[0]; 5928 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); 5929 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = 5930 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; 5931 h->last_qscale_diff = 0; 5932 } 5933 5934 s->current_picture.qscale_table[mb_xy]= s->qscale; 5935 write_back_non_zero_count(h); 5936 5937 if(MB_MBAFF){ 5938 h->ref_count[0] >>= 1; 5939 h->ref_count[1] >>= 1; 5940 } 5941 5942 return 0; 5943} 5944 5945 5946static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { 5947 const int index_a = qp + h->slice_alpha_c0_offset; 5948 const int alpha = (alpha_table+52)[index_a]; 5949 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; 5950 5951 if( bS[0] < 4 ) { 5952 int8_t tc[4]; 5953 tc[0] = (tc0_table+52)[index_a][bS[0]]; 5954 tc[1] = (tc0_table+52)[index_a][bS[1]]; 5955 tc[2] = (tc0_table+52)[index_a][bS[2]]; 5956 tc[3] = (tc0_table+52)[index_a][bS[3]]; 5957 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); 5958 } else { 5959 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); 5960 } 5961} 5962static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { 5963 const int index_a = qp + h->slice_alpha_c0_offset; 5964 const int alpha = (alpha_table+52)[index_a]; 5965 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; 5966 5967 if( bS[0] < 4 ) { 5968 int8_t tc[4]; 5969 tc[0] = (tc0_table+52)[index_a][bS[0]]+1; 5970 tc[1] = (tc0_table+52)[index_a][bS[1]]+1; 5971 tc[2] = (tc0_table+52)[index_a][bS[2]]+1; 5972 tc[3] = (tc0_table+52)[index_a][bS[3]]+1; 5973 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); 5974 } else { 5975 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 5976 } 5977} 5978 5979static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { 5980 int i; 5981 for( i = 0; i < 16; i++, pix += stride) { 5982 int index_a; 5983 int alpha; 5984 int beta; 5985 5986 int qp_index; 5987 int bS_index = (i >> 1); 5988 if (!MB_FIELD) { 5989 bS_index &= ~1; 5990 bS_index |= (i & 1); 5991 } 5992 5993 if( bS[bS_index] == 0 ) { 5994 continue; 5995 } 5996 5997 qp_index = MB_FIELD ? (i >> 3) : (i & 1); 5998 index_a = qp[qp_index] + h->slice_alpha_c0_offset; 5999 alpha = (alpha_table+52)[index_a]; 6000 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; 6001 6002 if( bS[bS_index] < 4 ) { 6003 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]]; 6004 const int p0 = pix[-1]; 6005 const int p1 = pix[-2]; 6006 const int p2 = pix[-3]; 6007 const int q0 = pix[0]; 6008 const int q1 = pix[1]; 6009 const int q2 = pix[2]; 6010 6011 if( FFABS( p0 - q0 ) < alpha && 6012 FFABS( p1 - p0 ) < beta && 6013 FFABS( q1 - q0 ) < beta ) { 6014 int tc = tc0; 6015 int i_delta; 6016 6017 if( FFABS( p2 - p0 ) < beta ) { 6018 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); 6019 tc++; 6020 } 6021 if( FFABS( q2 - q0 ) < beta ) { 6022 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); 6023 tc++; 6024 } 6025 6026 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 6027 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ 6028 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ 6029 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); 6030 } 6031 }else{ 6032 const int p0 = pix[-1]; 6033 const int p1 = pix[-2]; 6034 const int p2 = pix[-3]; 6035 6036 const int q0 = pix[0]; 6037 const int q1 = pix[1]; 6038 const int q2 = pix[2]; 6039 6040 if( FFABS( p0 - q0 ) < alpha && 6041 FFABS( p1 - p0 ) < beta && 6042 FFABS( q1 - q0 ) < beta ) { 6043 6044 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ 6045 if( FFABS( p2 - p0 ) < beta) 6046 { 6047 const int p3 = pix[-4]; 6048 /* p0', p1', p2' */ 6049 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; 6050 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; 6051 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; 6052 } else { 6053 /* p0' */ 6054 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 6055 } 6056 if( FFABS( q2 - q0 ) < beta) 6057 { 6058 const int q3 = pix[3]; 6059 /* q0', q1', q2' */ 6060 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; 6061 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; 6062 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; 6063 } else { 6064 /* q0' */ 6065 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 6066 } 6067 }else{ 6068 /* p0', q0' */ 6069 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 6070 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 6071 } 6072 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 6073 } 6074 } 6075 } 6076} 6077static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { 6078 int i; 6079 for( i = 0; i < 8; i++, pix += stride) { 6080 int index_a; 6081 int alpha; 6082 int beta; 6083 6084 int qp_index; 6085 int bS_index = i; 6086 6087 if( bS[bS_index] == 0 ) { 6088 continue; 6089 } 6090 6091 qp_index = MB_FIELD ? (i >> 2) : (i & 1); 6092 index_a = qp[qp_index] + h->slice_alpha_c0_offset; 6093 alpha = (alpha_table+52)[index_a]; 6094 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; 6095 6096 if( bS[bS_index] < 4 ) { 6097 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1; 6098 const int p0 = pix[-1]; 6099 const int p1 = pix[-2]; 6100 const int q0 = pix[0]; 6101 const int q1 = pix[1]; 6102 6103 if( FFABS( p0 - q0 ) < alpha && 6104 FFABS( p1 - p0 ) < beta && 6105 FFABS( q1 - q0 ) < beta ) { 6106 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 6107 6108 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ 6109 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ 6110 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); 6111 } 6112 }else{ 6113 const int p0 = pix[-1]; 6114 const int p1 = pix[-2]; 6115 const int q0 = pix[0]; 6116 const int q1 = pix[1]; 6117 6118 if( FFABS( p0 - q0 ) < alpha && 6119 FFABS( p1 - p0 ) < beta && 6120 FFABS( q1 - q0 ) < beta ) { 6121 6122 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 6123 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 6124 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 6125 } 6126 } 6127 } 6128} 6129 6130static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { 6131 const int index_a = qp + h->slice_alpha_c0_offset; 6132 const int alpha = (alpha_table+52)[index_a]; 6133 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; 6134 6135 if( bS[0] < 4 ) { 6136 int8_t tc[4]; 6137 tc[0] = (tc0_table+52)[index_a][bS[0]]; 6138 tc[1] = (tc0_table+52)[index_a][bS[1]]; 6139 tc[2] = (tc0_table+52)[index_a][bS[2]]; 6140 tc[3] = (tc0_table+52)[index_a][bS[3]]; 6141 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); 6142 } else { 6143 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); 6144 } 6145} 6146 6147static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { 6148 const int index_a = qp + h->slice_alpha_c0_offset; 6149 const int alpha = (alpha_table+52)[index_a]; 6150 const int beta = (beta_table+52)[qp + h->slice_beta_offset]; 6151 6152 if( bS[0] < 4 ) { 6153 int8_t tc[4]; 6154 tc[0] = (tc0_table+52)[index_a][bS[0]]+1; 6155 tc[1] = (tc0_table+52)[index_a][bS[1]]+1; 6156 tc[2] = (tc0_table+52)[index_a][bS[2]]+1; 6157 tc[3] = (tc0_table+52)[index_a][bS[3]]+1; 6158 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); 6159 } else { 6160 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); 6161 } 6162} 6163 6164static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 6165 MpegEncContext * const s = &h->s; 6166 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD; 6167 int mb_xy, mb_type; 6168 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; 6169 6170 mb_xy = h->mb_xy; 6171 6172 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || 6173 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST 6174 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] || 6175 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) { 6176 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); 6177 return; 6178 } 6179 assert(!FRAME_MBAFF); 6180 6181 mb_type = s->current_picture.mb_type[mb_xy]; 6182 qp = s->current_picture.qscale_table[mb_xy]; 6183 qp0 = s->current_picture.qscale_table[mb_xy-1]; 6184 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; 6185 qpc = get_chroma_qp( h, 0, qp ); 6186 qpc0 = get_chroma_qp( h, 0, qp0 ); 6187 qpc1 = get_chroma_qp( h, 0, qp1 ); 6188 qp0 = (qp + qp0 + 1) >> 1; 6189 qp1 = (qp + qp1 + 1) >> 1; 6190 qpc0 = (qpc + qpc0 + 1) >> 1; 6191 qpc1 = (qpc + qpc1 + 1) >> 1; 6192 qp_thresh = 15 - h->slice_alpha_c0_offset; 6193 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && 6194 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) 6195 return; 6196 6197 if( IS_INTRA(mb_type) ) { 6198 int16_t bS4[4] = {4,4,4,4}; 6199 int16_t bS3[4] = {3,3,3,3}; 6200 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; 6201 if( IS_8x8DCT(mb_type) ) { 6202 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); 6203 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); 6204 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 ); 6205 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); 6206 } else { 6207 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); 6208 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp ); 6209 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); 6210 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp ); 6211 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 ); 6212 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp ); 6213 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); 6214 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp ); 6215 } 6216 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 ); 6217 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc ); 6218 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 ); 6219 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc ); 6220 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 ); 6221 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc ); 6222 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 ); 6223 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc ); 6224 return; 6225 } else { 6226 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); 6227 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; 6228 int edges; 6229 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { 6230 edges = 4; 6231 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; 6232 } else { 6233 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 6234 (mb_type & MB_TYPE_16x8) ? 1 : 0; 6235 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) 6236 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16)) 6237 ? 3 : 0; 6238 int step = IS_8x8DCT(mb_type) ? 2 : 1; 6239 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; 6240 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, 6241 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); 6242 } 6243 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) 6244 bSv[0][0] = 0x0004000400040004ULL; 6245 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) ) 6246 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; 6247 6248#define FILTER(hv,dir,edge)\ 6249 if(bSv[dir][edge]) {\ 6250 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\ 6251 if(!(edge&1)) {\ 6252 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ 6253 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ 6254 }\ 6255 } 6256 if( edges == 1 ) { 6257 FILTER(v,0,0); 6258 FILTER(h,1,0); 6259 } else if( IS_8x8DCT(mb_type) ) { 6260 FILTER(v,0,0); 6261 FILTER(v,0,2); 6262 FILTER(h,1,0); 6263 FILTER(h,1,2); 6264 } else { 6265 FILTER(v,0,0); 6266 FILTER(v,0,1); 6267 FILTER(v,0,2); 6268 FILTER(v,0,3); 6269 FILTER(h,1,0); 6270 FILTER(h,1,1); 6271 FILTER(h,1,2); 6272 FILTER(h,1,3); 6273 } 6274#undef FILTER 6275 } 6276} 6277 6278 6279static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { 6280 MpegEncContext * const s = &h->s; 6281 int edge; 6282 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 6283 const int mbm_type = s->current_picture.mb_type[mbm_xy]; 6284 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 6285 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 6286 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0; 6287 6288 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) 6289 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; 6290 // how often to recheck mv-based bS when iterating between edges 6291 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 : 6292 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0; 6293 // how often to recheck mv-based bS when iterating along each edge 6294 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); 6295 6296 if (first_vertical_edge_done) { 6297 start = 1; 6298 } 6299 6300 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy]) 6301 start = 1; 6302 6303 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0 6304 && !IS_INTERLACED(mb_type) 6305 && IS_INTERLACED(mbm_type) 6306 ) { 6307 // This is a special case in the norm where the filtering must 6308 // be done twice (one each of the field) even if we are in a 6309 // frame macroblock. 6310 // 6311 static const int nnz_idx[4] = {4,5,6,3}; 6312 unsigned int tmp_linesize = 2 * linesize; 6313 unsigned int tmp_uvlinesize = 2 * uvlinesize; 6314 int mbn_xy = mb_xy - 2 * s->mb_stride; 6315 int qp; 6316 int i, j; 6317 int16_t bS[4]; 6318 6319 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ 6320 if( IS_INTRA(mb_type) || 6321 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { 6322 bS[0] = bS[1] = bS[2] = bS[3] = 3; 6323 } else { 6324 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy]; 6325 for( i = 0; i < 4; i++ ) { 6326 if( h->non_zero_count_cache[scan8[0]+i] != 0 || 6327 mbn_nnz[nnz_idx[i]] != 0 ) 6328 bS[i] = 2; 6329 else 6330 bS[i] = 1; 6331 } 6332 } 6333 // Do not use s->qscale as luma quantizer because it has not the same 6334 // value in IPCM macroblocks. 6335 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; 6336 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); 6337 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 6338 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp ); 6339 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, 6340 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6341 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, 6342 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6343 } 6344 6345 start = 1; 6346 } 6347 6348 /* Calculate bS */ 6349 for( edge = start; edge < edges; edge++ ) { 6350 /* mbn_xy: neighbor macroblock */ 6351 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; 6352 const int mbn_type = s->current_picture.mb_type[mbn_xy]; 6353 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm; 6354 int16_t bS[4]; 6355 int qp; 6356 6357 if( (edge&1) && IS_8x8DCT(mb_type) ) 6358 continue; 6359 6360 if( IS_INTRA(mb_type) || 6361 IS_INTRA(mbn_type) ) { 6362 int value; 6363 if (edge == 0) { 6364 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type)) 6365 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) 6366 ) { 6367 value = 4; 6368 } else { 6369 value = 3; 6370 } 6371 } else { 6372 value = 3; 6373 } 6374 bS[0] = bS[1] = bS[2] = bS[3] = value; 6375 } else { 6376 int i, l; 6377 int mv_done; 6378 6379 if( edge & mask_edge ) { 6380 bS[0] = bS[1] = bS[2] = bS[3] = 0; 6381 mv_done = 1; 6382 } 6383 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) { 6384 bS[0] = bS[1] = bS[2] = bS[3] = 1; 6385 mv_done = 1; 6386 } 6387 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { 6388 int b_idx= 8 + 4 + edge * (dir ? 8:1); 6389 int bn_idx= b_idx - (dir ? 8:1); 6390 int v = 0; 6391 6392 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { 6393 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] || 6394 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || 6395 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; 6396 } 6397 6398 if(h->slice_type_nos == FF_B_TYPE && v){ 6399 v=0; 6400 for( l = 0; !v && l < 2; l++ ) { 6401 int ln= 1-l; 6402 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] || 6403 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 || 6404 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; 6405 } 6406 } 6407 6408 bS[0] = bS[1] = bS[2] = bS[3] = v; 6409 mv_done = 1; 6410 } 6411 else 6412 mv_done = 0; 6413 6414 for( i = 0; i < 4; i++ ) { 6415 int x = dir == 0 ? edge : i; 6416 int y = dir == 0 ? i : edge; 6417 int b_idx= 8 + 4 + x + 8*y; 6418 int bn_idx= b_idx - (dir ? 8:1); 6419 6420 if( h->non_zero_count_cache[b_idx] | 6421 h->non_zero_count_cache[bn_idx] ) { 6422 bS[i] = 2; 6423 } 6424 else if(!mv_done) 6425 { 6426 bS[i] = 0; 6427 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { 6428 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] || 6429 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || 6430 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { 6431 bS[i] = 1; 6432 break; 6433 } 6434 } 6435 6436 if(h->slice_type_nos == FF_B_TYPE && bS[i]){ 6437 bS[i] = 0; 6438 for( l = 0; l < 2; l++ ) { 6439 int ln= 1-l; 6440 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] || 6441 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 || 6442 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { 6443 bS[i] = 1; 6444 break; 6445 } 6446 } 6447 } 6448 } 6449 } 6450 6451 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 6452 continue; 6453 } 6454 6455 /* Filter edge */ 6456 // Do not use s->qscale as luma quantizer because it has not the same 6457 // value in IPCM macroblocks. 6458 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; 6459 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]); 6460 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); 6461 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 6462 if( dir == 0 ) { 6463 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp ); 6464 if( (edge&1) == 0 ) { 6465 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, 6466 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6467 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, 6468 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6469 } 6470 } else { 6471 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp ); 6472 if( (edge&1) == 0 ) { 6473 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, 6474 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6475 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, 6476 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1); 6477 } 6478 } 6479 } 6480} 6481 6482static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 6483 MpegEncContext * const s = &h->s; 6484 const int mb_xy= mb_x + mb_y*s->mb_stride; 6485 const int mb_type = s->current_picture.mb_type[mb_xy]; 6486 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; 6487 int first_vertical_edge_done = 0; 6488 av_unused int dir; 6489 6490 //for sufficiently low qp, filtering wouldn't do anything 6491 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp 6492 if(!FRAME_MBAFF){ 6493 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); 6494 int qp = s->current_picture.qscale_table[mb_xy]; 6495 if(qp <= qp_thresh 6496 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) 6497 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ 6498 return; 6499 } 6500 } 6501 6502 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 6503 if(!h->pps.cabac && h->pps.transform_8x8_mode){ 6504 int top_type, left_type[2]; 6505 top_type = s->current_picture.mb_type[h->top_mb_xy] ; 6506 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; 6507 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]]; 6508 6509 if(IS_8x8DCT(top_type)){ 6510 h->non_zero_count_cache[4+8*0]= 6511 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4; 6512 h->non_zero_count_cache[6+8*0]= 6513 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8; 6514 } 6515 if(IS_8x8DCT(left_type[0])){ 6516 h->non_zero_count_cache[3+8*1]= 6517 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF 6518 } 6519 if(IS_8x8DCT(left_type[1])){ 6520 h->non_zero_count_cache[3+8*3]= 6521 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF 6522 } 6523 6524 if(IS_8x8DCT(mb_type)){ 6525 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= 6526 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; 6527 6528 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= 6529 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; 6530 6531 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= 6532 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; 6533 6534 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= 6535 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; 6536 } 6537 } 6538 6539 if (FRAME_MBAFF 6540 // left mb is in picture 6541 && h->slice_table[mb_xy-1] != 0xFFFF 6542 // and current and left pair do not have the same interlaced type 6543 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1])) 6544 // and left mb is in the same slice if deblocking_filter == 2 6545 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) { 6546 /* First vertical edge is different in MBAFF frames 6547 * There are 8 different bS to compute and 2 different Qp 6548 */ 6549 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; 6550 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; 6551 int16_t bS[8]; 6552 int qp[2]; 6553 int bqp[2]; 6554 int rqp[2]; 6555 int mb_qp, mbn0_qp, mbn1_qp; 6556 int i; 6557 first_vertical_edge_done = 1; 6558 6559 if( IS_INTRA(mb_type) ) 6560 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4; 6561 else { 6562 for( i = 0; i < 8; i++ ) { 6563 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1]; 6564 6565 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) 6566 bS[i] = 4; 6567 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || 6568 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ? 6569 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) 6570 : 6571 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2])) 6572 bS[i] = 2; 6573 else 6574 bS[i] = 1; 6575 } 6576 } 6577 6578 mb_qp = s->current_picture.qscale_table[mb_xy]; 6579 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]]; 6580 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]]; 6581 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; 6582 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + 6583 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; 6584 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + 6585 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; 6586 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; 6587 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + 6588 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; 6589 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + 6590 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; 6591 6592 /* Filter edge */ 6593 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); 6594 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 6595 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); 6596 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp ); 6597 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp ); 6598 } 6599 6600#if CONFIG_SMALL 6601 for( dir = 0; dir < 2; dir++ ) 6602 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); 6603#else 6604 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); 6605 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); 6606#endif 6607} 6608 6609static int decode_slice(struct AVCodecContext *avctx, void *arg){ 6610 H264Context *h = *(void**)arg; 6611 MpegEncContext * const s = &h->s; 6612 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 6613 6614 s->mb_skip_run= -1; 6615 6616 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || 6617 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 6618 6619 if( h->pps.cabac ) { 6620 int i; 6621 6622 /* realign */ 6623 align_get_bits( &s->gb ); 6624 6625 /* init cabac */ 6626 ff_init_cabac_states( &h->cabac); 6627 ff_init_cabac_decoder( &h->cabac, 6628 s->gb.buffer + get_bits_count(&s->gb)/8, 6629 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8); 6630 /* calculate pre-state */ 6631 for( i= 0; i < 460; i++ ) { 6632 int pre; 6633 if( h->slice_type_nos == FF_I_TYPE ) 6634 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 ); 6635 else 6636 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 ); 6637 6638 if( pre <= 63 ) 6639 h->cabac_state[i] = 2 * ( 63 - pre ) + 0; 6640 else 6641 h->cabac_state[i] = 2 * ( pre - 64 ) + 1; 6642 } 6643 6644 for(;;){ 6645//START_TIMER 6646 int ret = decode_mb_cabac(h); 6647 int eos; 6648//STOP_TIMER("decode_mb_cabac") 6649 6650 if(ret>=0) hl_decode_mb(h); 6651 6652 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 6653 s->mb_y++; 6654 6655 ret = decode_mb_cabac(h); 6656 6657 if(ret>=0) hl_decode_mb(h); 6658 s->mb_y--; 6659 } 6660 eos = get_cabac_terminate( &h->cabac ); 6661 6662 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { 6663 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); 6664 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 6665 return -1; 6666 } 6667 6668 if( ++s->mb_x >= s->mb_width ) { 6669 s->mb_x = 0; 6670 ff_draw_horiz_band(s, 16*s->mb_y, 16); 6671 ++s->mb_y; 6672 if(FIELD_OR_MBAFF_PICTURE) { 6673 ++s->mb_y; 6674 } 6675 } 6676 6677 if( eos || s->mb_y >= s->mb_height ) { 6678 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 6679 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6680 return 0; 6681 } 6682 } 6683 6684 } else { 6685 for(;;){ 6686 int ret = decode_mb_cavlc(h); 6687 6688 if(ret>=0) hl_decode_mb(h); 6689 6690 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? 6691 s->mb_y++; 6692 ret = decode_mb_cavlc(h); 6693 6694 if(ret>=0) hl_decode_mb(h); 6695 s->mb_y--; 6696 } 6697 6698 if(ret<0){ 6699 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 6700 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 6701 6702 return -1; 6703 } 6704 6705 if(++s->mb_x >= s->mb_width){ 6706 s->mb_x=0; 6707 ff_draw_horiz_band(s, 16*s->mb_y, 16); 6708 ++s->mb_y; 6709 if(FIELD_OR_MBAFF_PICTURE) { 6710 ++s->mb_y; 6711 } 6712 if(s->mb_y >= s->mb_height){ 6713 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 6714 6715 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { 6716 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6717 6718 return 0; 6719 }else{ 6720 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6721 6722 return -1; 6723 } 6724 } 6725 } 6726 6727 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ 6728 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 6729 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ 6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6731 6732 return 0; 6733 }else{ 6734 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 6735 6736 return -1; 6737 } 6738 } 6739 } 6740 } 6741 6742#if 0 6743 for(;s->mb_y < s->mb_height; s->mb_y++){ 6744 for(;s->mb_x < s->mb_width; s->mb_x++){ 6745 int ret= decode_mb(h); 6746 6747 hl_decode_mb(h); 6748 6749 if(ret<0){ 6750 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 6751 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 6752 6753 return -1; 6754 } 6755 6756 if(++s->mb_x >= s->mb_width){ 6757 s->mb_x=0; 6758 if(++s->mb_y >= s->mb_height){ 6759 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6761 6762 return 0; 6763 }else{ 6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6765 6766 return -1; 6767 } 6768 } 6769 } 6770 6771 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ 6772 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 6773 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 6774 6775 return 0; 6776 }else{ 6777 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 6778 6779 return -1; 6780 } 6781 } 6782 } 6783 s->mb_x=0; 6784 ff_draw_horiz_band(s, 16*s->mb_y, 16); 6785 } 6786#endif 6787 return -1; //not reached 6788} 6789 6790static int decode_picture_timing(H264Context *h){ 6791 MpegEncContext * const s = &h->s; 6792 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){ 6793 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length); 6794 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length); 6795 } 6796 if(h->sps.pic_struct_present_flag){ 6797 unsigned int i, num_clock_ts; 6798 h->sei_pic_struct = get_bits(&s->gb, 4); 6799 6800 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING) 6801 return -1; 6802 6803 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct]; 6804 6805 for (i = 0 ; i < num_clock_ts ; i++){ 6806 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */ 6807 unsigned int full_timestamp_flag; 6808 skip_bits(&s->gb, 2); /* ct_type */ 6809 skip_bits(&s->gb, 1); /* nuit_field_based_flag */ 6810 skip_bits(&s->gb, 5); /* counting_type */ 6811 full_timestamp_flag = get_bits(&s->gb, 1); 6812 skip_bits(&s->gb, 1); /* discontinuity_flag */ 6813 skip_bits(&s->gb, 1); /* cnt_dropped_flag */ 6814 skip_bits(&s->gb, 8); /* n_frames */ 6815 if(full_timestamp_flag){ 6816 skip_bits(&s->gb, 6); /* seconds_value 0..59 */ 6817 skip_bits(&s->gb, 6); /* minutes_value 0..59 */ 6818 skip_bits(&s->gb, 5); /* hours_value 0..23 */ 6819 }else{ 6820 if(get_bits(&s->gb, 1)){ /* seconds_flag */ 6821 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */ 6822 if(get_bits(&s->gb, 1)){ /* minutes_flag */ 6823 skip_bits(&s->gb, 6); /* minutes_value 0..59 */ 6824 if(get_bits(&s->gb, 1)) /* hours_flag */ 6825 skip_bits(&s->gb, 5); /* hours_value 0..23 */ 6826 } 6827 } 6828 } 6829 if(h->sps.time_offset_length > 0) 6830 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */ 6831 } 6832 } 6833 } 6834 return 0; 6835} 6836 6837static int decode_unregistered_user_data(H264Context *h, int size){ 6838 MpegEncContext * const s = &h->s; 6839 uint8_t user_data[16+256]; 6840 int e, build, i; 6841 6842 if(size<16) 6843 return -1; 6844 6845 for(i=0; i<sizeof(user_data)-1 && i<size; i++){ 6846 user_data[i]= get_bits(&s->gb, 8); 6847 } 6848 6849 user_data[i]= 0; 6850 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build); 6851 if(e==1 && build>=0) 6852 h->x264_build= build; 6853 6854 if(s->avctx->debug & FF_DEBUG_BUGS) 6855 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16); 6856 6857 for(; i<size; i++) 6858 skip_bits(&s->gb, 8); 6859 6860 return 0; 6861} 6862 6863static int decode_recovery_point(H264Context *h){ 6864 MpegEncContext * const s = &h->s; 6865 6866 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb); 6867 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */ 6868 6869 return 0; 6870} 6871 6872static int decode_buffering_period(H264Context *h){ 6873 MpegEncContext * const s = &h->s; 6874 unsigned int sps_id; 6875 int sched_sel_idx; 6876 SPS *sps; 6877 6878 sps_id = get_ue_golomb_31(&s->gb); 6879 if(sps_id > 31 || !h->sps_buffers[sps_id]) { 6880 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id); 6881 return -1; 6882 } 6883 sps = h->sps_buffers[sps_id]; 6884 6885 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1 6886 if (sps->nal_hrd_parameters_present_flag) { 6887 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) { 6888 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length); 6889 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset 6890 } 6891 } 6892 if (sps->vcl_hrd_parameters_present_flag) { 6893 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) { 6894 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length); 6895 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset 6896 } 6897 } 6898 6899 h->sei_buffering_period_present = 1; 6900 return 0; 6901} 6902 6903int ff_h264_decode_sei(H264Context *h){ 6904 MpegEncContext * const s = &h->s; 6905 6906 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){ 6907 int size, type; 6908 6909 type=0; 6910 do{ 6911 type+= show_bits(&s->gb, 8); 6912 }while(get_bits(&s->gb, 8) == 255); 6913 6914 size=0; 6915 do{ 6916 size+= show_bits(&s->gb, 8); 6917 }while(get_bits(&s->gb, 8) == 255); 6918 6919 switch(type){ 6920 case SEI_TYPE_PIC_TIMING: // Picture timing SEI 6921 if(decode_picture_timing(h) < 0) 6922 return -1; 6923 break; 6924 case SEI_TYPE_USER_DATA_UNREGISTERED: 6925 if(decode_unregistered_user_data(h, size) < 0) 6926 return -1; 6927 break; 6928 case SEI_TYPE_RECOVERY_POINT: 6929 if(decode_recovery_point(h) < 0) 6930 return -1; 6931 break; 6932 case SEI_BUFFERING_PERIOD: 6933 if(decode_buffering_period(h) < 0) 6934 return -1; 6935 break; 6936 default: 6937 skip_bits(&s->gb, 8*size); 6938 } 6939 6940 //FIXME check bits here 6941 align_get_bits(&s->gb); 6942 } 6943 6944 return 0; 6945} 6946 6947static inline int decode_hrd_parameters(H264Context *h, SPS *sps){ 6948 MpegEncContext * const s = &h->s; 6949 int cpb_count, i; 6950 cpb_count = get_ue_golomb_31(&s->gb) + 1; 6951 6952 if(cpb_count > 32U){ 6953 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count); 6954 return -1; 6955 } 6956 6957 get_bits(&s->gb, 4); /* bit_rate_scale */ 6958 get_bits(&s->gb, 4); /* cpb_size_scale */ 6959 for(i=0; i<cpb_count; i++){ 6960 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */ 6961 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */ 6962 get_bits1(&s->gb); /* cbr_flag */ 6963 } 6964 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1; 6965 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1; 6966 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1; 6967 sps->time_offset_length = get_bits(&s->gb, 5); 6968 sps->cpb_cnt = cpb_count; 6969 return 0; 6970} 6971 6972static inline int decode_vui_parameters(H264Context *h, SPS *sps){ 6973 MpegEncContext * const s = &h->s; 6974 int aspect_ratio_info_present_flag; 6975 unsigned int aspect_ratio_idc; 6976 6977 aspect_ratio_info_present_flag= get_bits1(&s->gb); 6978 6979 if( aspect_ratio_info_present_flag ) { 6980 aspect_ratio_idc= get_bits(&s->gb, 8); 6981 if( aspect_ratio_idc == EXTENDED_SAR ) { 6982 sps->sar.num= get_bits(&s->gb, 16); 6983 sps->sar.den= get_bits(&s->gb, 16); 6984 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){ 6985 sps->sar= pixel_aspect[aspect_ratio_idc]; 6986 }else{ 6987 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n"); 6988 return -1; 6989 } 6990 }else{ 6991 sps->sar.num= 6992 sps->sar.den= 0; 6993 } 6994// s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height); 6995 6996 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */ 6997 get_bits1(&s->gb); /* overscan_appropriate_flag */ 6998 } 6999 7000 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */ 7001 get_bits(&s->gb, 3); /* video_format */ 7002 get_bits1(&s->gb); /* video_full_range_flag */ 7003 if(get_bits1(&s->gb)){ /* colour_description_present_flag */ 7004 get_bits(&s->gb, 8); /* colour_primaries */ 7005 get_bits(&s->gb, 8); /* transfer_characteristics */ 7006 get_bits(&s->gb, 8); /* matrix_coefficients */ 7007 } 7008 } 7009 7010 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */ 7011 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */ 7012 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */ 7013 } 7014 7015 sps->timing_info_present_flag = get_bits1(&s->gb); 7016 if(sps->timing_info_present_flag){ 7017 sps->num_units_in_tick = get_bits_long(&s->gb, 32); 7018 sps->time_scale = get_bits_long(&s->gb, 32); 7019 if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){ 7020 av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick inavlid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick); 7021 return -1; 7022 } 7023 sps->fixed_frame_rate_flag = get_bits1(&s->gb); 7024 } 7025 7026 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb); 7027 if(sps->nal_hrd_parameters_present_flag) 7028 if(decode_hrd_parameters(h, sps) < 0) 7029 return -1; 7030 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb); 7031 if(sps->vcl_hrd_parameters_present_flag) 7032 if(decode_hrd_parameters(h, sps) < 0) 7033 return -1; 7034 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag) 7035 get_bits1(&s->gb); /* low_delay_hrd_flag */ 7036 sps->pic_struct_present_flag = get_bits1(&s->gb); 7037 7038 sps->bitstream_restriction_flag = get_bits1(&s->gb); 7039 if(sps->bitstream_restriction_flag){ 7040 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */ 7041 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */ 7042 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */ 7043 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */ 7044 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */ 7045 sps->num_reorder_frames= get_ue_golomb(&s->gb); 7046 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/ 7047 7048 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){ 7049 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames); 7050 return -1; 7051 } 7052 } 7053 7054 return 0; 7055} 7056 7057static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, 7058 const uint8_t *jvt_list, const uint8_t *fallback_list){ 7059 MpegEncContext * const s = &h->s; 7060 int i, last = 8, next = 8; 7061 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct; 7062 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */ 7063 memcpy(factors, fallback_list, size*sizeof(uint8_t)); 7064 else 7065 for(i=0;i<size;i++){ 7066 if(next) 7067 next = (last + get_se_golomb(&s->gb)) & 0xff; 7068 if(!i && !next){ /* matrix not written, we use the preset one */ 7069 memcpy(factors, jvt_list, size*sizeof(uint8_t)); 7070 break; 7071 } 7072 last = factors[scan[i]] = next ? next : last; 7073 } 7074} 7075 7076static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps, 7077 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){ 7078 MpegEncContext * const s = &h->s; 7079 int fallback_sps = !is_sps && sps->scaling_matrix_present; 7080 const uint8_t *fallback[4] = { 7081 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0], 7082 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1], 7083 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0], 7084 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1] 7085 }; 7086 if(get_bits1(&s->gb)){ 7087 sps->scaling_matrix_present |= is_sps; 7088 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y 7089 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr 7090 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb 7091 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y 7092 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr 7093 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb 7094 if(is_sps || pps->transform_8x8_mode){ 7095 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y 7096 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y 7097 } 7098 } 7099} 7100 7101int ff_h264_decode_seq_parameter_set(H264Context *h){ 7102 MpegEncContext * const s = &h->s; 7103 int profile_idc, level_idc; 7104 unsigned int sps_id; 7105 int i; 7106 SPS *sps; 7107 7108 profile_idc= get_bits(&s->gb, 8); 7109 get_bits1(&s->gb); //constraint_set0_flag 7110 get_bits1(&s->gb); //constraint_set1_flag 7111 get_bits1(&s->gb); //constraint_set2_flag 7112 get_bits1(&s->gb); //constraint_set3_flag 7113 get_bits(&s->gb, 4); // reserved 7114 level_idc= get_bits(&s->gb, 8); 7115 sps_id= get_ue_golomb_31(&s->gb); 7116 7117 if(sps_id >= MAX_SPS_COUNT) { 7118 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id); 7119 return -1; 7120 } 7121 sps= av_mallocz(sizeof(SPS)); 7122 if(sps == NULL) 7123 return -1; 7124 7125 sps->profile_idc= profile_idc; 7126 sps->level_idc= level_idc; 7127 7128 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4)); 7129 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8)); 7130 sps->scaling_matrix_present = 0; 7131 7132 if(sps->profile_idc >= 100){ //high profile 7133 sps->chroma_format_idc= get_ue_golomb_31(&s->gb); 7134 if(sps->chroma_format_idc == 3) 7135 sps->residual_color_transform_flag = get_bits1(&s->gb); 7136 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8; 7137 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8; 7138 sps->transform_bypass = get_bits1(&s->gb); 7139 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8); 7140 }else{ 7141 sps->chroma_format_idc= 1; 7142 } 7143 7144 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; 7145 sps->poc_type= get_ue_golomb_31(&s->gb); 7146 7147 if(sps->poc_type == 0){ //FIXME #define 7148 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4; 7149 } else if(sps->poc_type == 1){//FIXME #define 7150 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb); 7151 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb); 7152 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb); 7153 sps->poc_cycle_length = get_ue_golomb(&s->gb); 7154 7155 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){ 7156 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length); 7157 goto fail; 7158 } 7159 7160 for(i=0; i<sps->poc_cycle_length; i++) 7161 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb); 7162 }else if(sps->poc_type != 2){ 7163 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type); 7164 goto fail; 7165 } 7166 7167 sps->ref_frame_count= get_ue_golomb_31(&s->gb); 7168 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){ 7169 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n"); 7170 goto fail; 7171 } 7172 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb); 7173 sps->mb_width = get_ue_golomb(&s->gb) + 1; 7174 sps->mb_height= get_ue_golomb(&s->gb) + 1; 7175 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 || 7176 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){ 7177 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n"); 7178 goto fail; 7179 } 7180 7181 sps->frame_mbs_only_flag= get_bits1(&s->gb); 7182 if(!sps->frame_mbs_only_flag) 7183 sps->mb_aff= get_bits1(&s->gb); 7184 else 7185 sps->mb_aff= 0; 7186 7187 sps->direct_8x8_inference_flag= get_bits1(&s->gb); 7188 7189#ifndef ALLOW_INTERLACE 7190 if(sps->mb_aff) 7191 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n"); 7192#endif 7193 sps->crop= get_bits1(&s->gb); 7194 if(sps->crop){ 7195 sps->crop_left = get_ue_golomb(&s->gb); 7196 sps->crop_right = get_ue_golomb(&s->gb); 7197 sps->crop_top = get_ue_golomb(&s->gb); 7198 sps->crop_bottom= get_ue_golomb(&s->gb); 7199 if(sps->crop_left || sps->crop_top){ 7200 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); 7201 } 7202 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){ 7203 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); 7204 } 7205 }else{ 7206 sps->crop_left = 7207 sps->crop_right = 7208 sps->crop_top = 7209 sps->crop_bottom= 0; 7210 } 7211 7212 sps->vui_parameters_present_flag= get_bits1(&s->gb); 7213 if( sps->vui_parameters_present_flag ) 7214 decode_vui_parameters(h, sps); 7215 7216 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 7217 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n", 7218 sps_id, sps->profile_idc, sps->level_idc, 7219 sps->poc_type, 7220 sps->ref_frame_count, 7221 sps->mb_width, sps->mb_height, 7222 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"), 7223 sps->direct_8x8_inference_flag ? "8B8" : "", 7224 sps->crop_left, sps->crop_right, 7225 sps->crop_top, sps->crop_bottom, 7226 sps->vui_parameters_present_flag ? "VUI" : "", 7227 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc] 7228 ); 7229 } 7230 7231 av_free(h->sps_buffers[sps_id]); 7232 h->sps_buffers[sps_id]= sps; 7233 h->sps = *sps; 7234 return 0; 7235fail: 7236 av_free(sps); 7237 return -1; 7238} 7239 7240static void 7241build_qp_table(PPS *pps, int t, int index) 7242{ 7243 int i; 7244 for(i = 0; i < 52; i++) 7245 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)]; 7246} 7247 7248int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){ 7249 MpegEncContext * const s = &h->s; 7250 unsigned int pps_id= get_ue_golomb(&s->gb); 7251 PPS *pps; 7252 7253 if(pps_id >= MAX_PPS_COUNT) { 7254 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id); 7255 return -1; 7256 } 7257 7258 pps= av_mallocz(sizeof(PPS)); 7259 if(pps == NULL) 7260 return -1; 7261 pps->sps_id= get_ue_golomb_31(&s->gb); 7262 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){ 7263 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n"); 7264 goto fail; 7265 } 7266 7267 pps->cabac= get_bits1(&s->gb); 7268 pps->pic_order_present= get_bits1(&s->gb); 7269 pps->slice_group_count= get_ue_golomb(&s->gb) + 1; 7270 if(pps->slice_group_count > 1 ){ 7271 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb); 7272 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n"); 7273 switch(pps->mb_slice_group_map_type){ 7274 case 0: 7275#if 0 7276| for( i = 0; i <= num_slice_groups_minus1; i++ ) | | | 7277| run_length[ i ] |1 |ue(v) | 7278#endif 7279 break; 7280 case 2: 7281#if 0 7282| for( i = 0; i < num_slice_groups_minus1; i++ ) | | | 7283|{ | | | 7284| top_left_mb[ i ] |1 |ue(v) | 7285| bottom_right_mb[ i ] |1 |ue(v) | 7286| } | | | 7287#endif 7288 break; 7289 case 3: 7290 case 4: 7291 case 5: 7292#if 0 7293| slice_group_change_direction_flag |1 |u(1) | 7294| slice_group_change_rate_minus1 |1 |ue(v) | 7295#endif 7296 break; 7297 case 6: 7298#if 0 7299| slice_group_id_cnt_minus1 |1 |ue(v) | 7300| for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | | 7301|) | | | 7302| slice_group_id[ i ] |1 |u(v) | 7303#endif 7304 break; 7305 } 7306 } 7307 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1; 7308 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1; 7309 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){ 7310 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n"); 7311 goto fail; 7312 } 7313 7314 pps->weighted_pred= get_bits1(&s->gb); 7315 pps->weighted_bipred_idc= get_bits(&s->gb, 2); 7316 pps->init_qp= get_se_golomb(&s->gb) + 26; 7317 pps->init_qs= get_se_golomb(&s->gb) + 26; 7318 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb); 7319 pps->deblocking_filter_parameters_present= get_bits1(&s->gb); 7320 pps->constrained_intra_pred= get_bits1(&s->gb); 7321 pps->redundant_pic_cnt_present = get_bits1(&s->gb); 7322 7323 pps->transform_8x8_mode= 0; 7324 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit 7325 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4)); 7326 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8)); 7327 7328 if(get_bits_count(&s->gb) < bit_length){ 7329 pps->transform_8x8_mode= get_bits1(&s->gb); 7330 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8); 7331 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset 7332 } else { 7333 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0]; 7334 } 7335 7336 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]); 7337 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]); 7338 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) 7339 h->pps.chroma_qp_diff= 1; 7340 7341 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 7342 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n", 7343 pps_id, pps->sps_id, 7344 pps->cabac ? "CABAC" : "CAVLC", 7345 pps->slice_group_count, 7346 pps->ref_count[0], pps->ref_count[1], 7347 pps->weighted_pred ? "weighted" : "", 7348 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1], 7349 pps->deblocking_filter_parameters_present ? "LPAR" : "", 7350 pps->constrained_intra_pred ? "CONSTR" : "", 7351 pps->redundant_pic_cnt_present ? "REDU" : "", 7352 pps->transform_8x8_mode ? "8x8DCT" : "" 7353 ); 7354 } 7355 7356 av_free(h->pps_buffers[pps_id]); 7357 h->pps_buffers[pps_id]= pps; 7358 return 0; 7359fail: 7360 av_free(pps); 7361 return -1; 7362} 7363 7364/** 7365 * Call decode_slice() for each context. 7366 * 7367 * @param h h264 master context 7368 * @param context_count number of contexts to execute 7369 */ 7370static void execute_decode_slices(H264Context *h, int context_count){ 7371 MpegEncContext * const s = &h->s; 7372 AVCodecContext * const avctx= s->avctx; 7373 H264Context *hx; 7374 int i; 7375 7376 if (s->avctx->hwaccel) 7377 return; 7378 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 7379 return; 7380 if(context_count == 1) { 7381 decode_slice(avctx, &h); 7382 } else { 7383 for(i = 1; i < context_count; i++) { 7384 hx = h->thread_context[i]; 7385 hx->s.error_recognition = avctx->error_recognition; 7386 hx->s.error_count = 0; 7387 } 7388 7389 avctx->execute(avctx, (void *)decode_slice, 7390 (void **)h->thread_context, NULL, context_count, sizeof(void*)); 7391 7392 /* pull back stuff from slices to master context */ 7393 hx = h->thread_context[context_count - 1]; 7394 s->mb_x = hx->s.mb_x; 7395 s->mb_y = hx->s.mb_y; 7396 s->dropable = hx->s.dropable; 7397 s->picture_structure = hx->s.picture_structure; 7398 for(i = 1; i < context_count; i++) 7399 h->s.error_count += h->thread_context[i]->s.error_count; 7400 } 7401} 7402 7403 7404static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ 7405 MpegEncContext * const s = &h->s; 7406 AVCodecContext * const avctx= s->avctx; 7407 int buf_index=0; 7408 H264Context *hx; ///< thread context 7409 int context_count = 0; 7410 7411 h->max_contexts = avctx->thread_count; 7412#if 0 7413 int i; 7414 for(i=0; i<50; i++){ 7415 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); 7416 } 7417#endif 7418 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ 7419 h->current_slice = 0; 7420 if (!s->first_field) 7421 s->current_picture_ptr= NULL; 7422 reset_sei(h); 7423 } 7424 7425 for(;;){ 7426 int consumed; 7427 int dst_length; 7428 int bit_length; 7429 const uint8_t *ptr; 7430 int i, nalsize = 0; 7431 int err; 7432 7433 if(h->is_avc) { 7434 if(buf_index >= buf_size) break; 7435 nalsize = 0; 7436 for(i = 0; i < h->nal_length_size; i++) 7437 nalsize = (nalsize << 8) | buf[buf_index++]; 7438 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){ 7439 if(nalsize == 1){ 7440 buf_index++; 7441 continue; 7442 }else{ 7443 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); 7444 break; 7445 } 7446 } 7447 } else { 7448 // start code prefix search 7449 for(; buf_index + 3 < buf_size; buf_index++){ 7450 // This should always succeed in the first iteration. 7451 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) 7452 break; 7453 } 7454 7455 if(buf_index+3 >= buf_size) break; 7456 7457 buf_index+=3; 7458 } 7459 7460 hx = h->thread_context[context_count]; 7461 7462 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); 7463 if (ptr==NULL || dst_length < 0){ 7464 return -1; 7465 } 7466 while(ptr[dst_length - 1] == 0 && dst_length > 0) 7467 dst_length--; 7468 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); 7469 7470 if(s->avctx->debug&FF_DEBUG_STARTCODE){ 7471 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); 7472 } 7473 7474 if (h->is_avc && (nalsize != consumed)){ 7475 int i, debug_level = AV_LOG_DEBUG; 7476 for (i = consumed; i < nalsize; i++) 7477 if (buf[buf_index+i]) 7478 debug_level = AV_LOG_ERROR; 7479 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); 7480 consumed= nalsize; 7481 } 7482 7483 buf_index += consumed; 7484 7485 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id 7486 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 7487 continue; 7488 7489 again: 7490 err = 0; 7491 switch(hx->nal_unit_type){ 7492 case NAL_IDR_SLICE: 7493 if (h->nal_unit_type != NAL_IDR_SLICE) { 7494 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); 7495 return -1; 7496 } 7497 idr(h); //FIXME ensure we don't loose some frames if there is reordering 7498 case NAL_SLICE: 7499 init_get_bits(&hx->s.gb, ptr, bit_length); 7500 hx->intra_gb_ptr= 7501 hx->inter_gb_ptr= &hx->s.gb; 7502 hx->s.data_partitioning = 0; 7503 7504 if((err = decode_slice_header(hx, h))) 7505 break; 7506 7507 if (s->avctx->hwaccel && h->current_slice == 1) { 7508 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) 7509 return -1; 7510 } 7511 7512 s->current_picture_ptr->key_frame |= 7513 (hx->nal_unit_type == NAL_IDR_SLICE) || 7514 (h->sei_recovery_frame_cnt >= 0); 7515 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5 7516 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 7517 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) 7518 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) 7519 && avctx->skip_frame < AVDISCARD_ALL){ 7520 if(avctx->hwaccel) { 7521 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) 7522 return -1; 7523 }else 7524 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ 7525 static const uint8_t start_code[] = {0x00, 0x00, 0x01}; 7526 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); 7527 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); 7528 }else 7529 context_count++; 7530 } 7531 break; 7532 case NAL_DPA: 7533 init_get_bits(&hx->s.gb, ptr, bit_length); 7534 hx->intra_gb_ptr= 7535 hx->inter_gb_ptr= NULL; 7536 hx->s.data_partitioning = 1; 7537 7538 err = decode_slice_header(hx, h); 7539 break; 7540 case NAL_DPB: 7541 init_get_bits(&hx->intra_gb, ptr, bit_length); 7542 hx->intra_gb_ptr= &hx->intra_gb; 7543 break; 7544 case NAL_DPC: 7545 init_get_bits(&hx->inter_gb, ptr, bit_length); 7546 hx->inter_gb_ptr= &hx->inter_gb; 7547 7548 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning 7549 && s->context_initialized 7550 && s->hurry_up < 5 7551 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 7552 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) 7553 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) 7554 && avctx->skip_frame < AVDISCARD_ALL) 7555 context_count++; 7556 break; 7557 case NAL_SEI: 7558 init_get_bits(&s->gb, ptr, bit_length); 7559 ff_h264_decode_sei(h); 7560 break; 7561 case NAL_SPS: 7562 init_get_bits(&s->gb, ptr, bit_length); 7563 ff_h264_decode_seq_parameter_set(h); 7564 7565 if(s->flags& CODEC_FLAG_LOW_DELAY) 7566 s->low_delay=1; 7567 7568 if(avctx->has_b_frames < 2) 7569 avctx->has_b_frames= !s->low_delay; 7570 break; 7571 case NAL_PPS: 7572 init_get_bits(&s->gb, ptr, bit_length); 7573 7574 ff_h264_decode_picture_parameter_set(h, bit_length); 7575 7576 break; 7577 case NAL_AUD: 7578 case NAL_END_SEQUENCE: 7579 case NAL_END_STREAM: 7580 case NAL_FILLER_DATA: 7581 case NAL_SPS_EXT: 7582 case NAL_AUXILIARY_SLICE: 7583 break; 7584 default: 7585 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length); 7586 } 7587 7588 if(context_count == h->max_contexts) { 7589 execute_decode_slices(h, context_count); 7590 context_count = 0; 7591 } 7592 7593 if (err < 0) 7594 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); 7595 else if(err == 1) { 7596 /* Slice could not be decoded in parallel mode, copy down 7597 * NAL unit stuff to context 0 and restart. Note that 7598 * rbsp_buffer is not transferred, but since we no longer 7599 * run in parallel mode this should not be an issue. */ 7600 h->nal_unit_type = hx->nal_unit_type; 7601 h->nal_ref_idc = hx->nal_ref_idc; 7602 hx = h; 7603 goto again; 7604 } 7605 } 7606 if(context_count) 7607 execute_decode_slices(h, context_count); 7608 return buf_index; 7609} 7610 7611/** 7612 * returns the number of bytes consumed for building the current frame 7613 */ 7614static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ 7615 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) 7616 if(pos+10>buf_size) pos=buf_size; // oops ;) 7617 7618 return pos; 7619} 7620 7621static int decode_frame(AVCodecContext *avctx, 7622 void *data, int *data_size, 7623 const uint8_t *buf, int buf_size) 7624{ 7625 H264Context *h = avctx->priv_data; 7626 MpegEncContext *s = &h->s; 7627 AVFrame *pict = data; 7628 int buf_index; 7629 7630 s->flags= avctx->flags; 7631 s->flags2= avctx->flags2; 7632 7633 /* end of stream, output what is still in the buffers */ 7634 if (buf_size == 0) { 7635 Picture *out; 7636 int i, out_idx; 7637 7638//FIXME factorize this with the output code below 7639 out = h->delayed_pic[0]; 7640 out_idx = 0; 7641 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++) 7642 if(h->delayed_pic[i]->poc < out->poc){ 7643 out = h->delayed_pic[i]; 7644 out_idx = i; 7645 } 7646 7647 for(i=out_idx; h->delayed_pic[i]; i++) 7648 h->delayed_pic[i] = h->delayed_pic[i+1]; 7649 7650 if(out){ 7651 *data_size = sizeof(AVFrame); 7652 *pict= *(AVFrame*)out; 7653 } 7654 7655 return 0; 7656 } 7657 7658 if(h->is_avc && !h->got_avcC) { 7659 int i, cnt, nalsize; 7660 unsigned char *p = avctx->extradata; 7661 if(avctx->extradata_size < 7) { 7662 av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); 7663 return -1; 7664 } 7665 if(*p != 1) { 7666 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p); 7667 return -1; 7668 } 7669 /* sps and pps in the avcC always have length coded with 2 bytes, 7670 so put a fake nal_length_size = 2 while parsing them */ 7671 h->nal_length_size = 2; 7672 // Decode sps from avcC 7673 cnt = *(p+5) & 0x1f; // Number of sps 7674 p += 6; 7675 for (i = 0; i < cnt; i++) { 7676 nalsize = AV_RB16(p) + 2; 7677 if(decode_nal_units(h, p, nalsize) < 0) { 7678 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); 7679 return -1; 7680 } 7681 p += nalsize; 7682 } 7683 // Decode pps from avcC 7684 cnt = *(p++); // Number of pps 7685 for (i = 0; i < cnt; i++) { 7686 nalsize = AV_RB16(p) + 2; 7687 if(decode_nal_units(h, p, nalsize) != nalsize) { 7688 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); 7689 return -1; 7690 } 7691 p += nalsize; 7692 } 7693 // Now store right nal length size, that will be use to parse all other nals 7694 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; 7695 // Do not reparse avcC 7696 h->got_avcC = 1; 7697 } 7698 7699 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){ 7700 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) 7701 return -1; 7702 h->got_avcC = 1; 7703 } 7704 7705 buf_index=decode_nal_units(h, buf, buf_size); 7706 if(buf_index < 0) 7707 return -1; 7708 7709 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ 7710 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0; 7711 av_log(avctx, AV_LOG_ERROR, "no frame!\n"); 7712 return -1; 7713 } 7714 7715 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ 7716 Picture *out = s->current_picture_ptr; 7717 Picture *cur = s->current_picture_ptr; 7718 int i, pics, cross_idr, out_of_order, out_idx; 7719 7720 s->mb_y= 0; 7721 7722 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; 7723 s->current_picture_ptr->pict_type= s->pict_type; 7724 7725 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 7726 ff_vdpau_h264_set_reference_frames(s); 7727 7728 if(!s->dropable) { 7729 execute_ref_pic_marking(h, h->mmco, h->mmco_index); 7730 h->prev_poc_msb= h->poc_msb; 7731 h->prev_poc_lsb= h->poc_lsb; 7732 } 7733 h->prev_frame_num_offset= h->frame_num_offset; 7734 h->prev_frame_num= h->frame_num; 7735 7736 if (avctx->hwaccel) { 7737 if (avctx->hwaccel->end_frame(avctx) < 0) 7738 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); 7739 } 7740 7741 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 7742 ff_vdpau_h264_picture_complete(s); 7743 7744 /* 7745 * FIXME: Error handling code does not seem to support interlaced 7746 * when slices span multiple rows 7747 * The ff_er_add_slice calls don't work right for bottom 7748 * fields; they cause massive erroneous error concealing 7749 * Error marking covers both fields (top and bottom). 7750 * This causes a mismatched s->error_count 7751 * and a bad error table. Further, the error count goes to 7752 * INT_MAX when called for bottom field, because mb_y is 7753 * past end by one (callers fault) and resync_mb_y != 0 7754 * causes problems for the first MB line, too. 7755 */ 7756 if (!FIELD_PICTURE) 7757 ff_er_frame_end(s); 7758 7759 MPV_frame_end(s); 7760 7761 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { 7762 /* Wait for second field. */ 7763 *data_size = 0; 7764 7765 } else { 7766 cur->repeat_pict = 0; 7767 7768 /* Signal interlacing information externally. */ 7769 /* Prioritize picture timing SEI information over used decoding process if it exists. */ 7770 if(h->sps.pic_struct_present_flag){ 7771 switch (h->sei_pic_struct) 7772 { 7773 case SEI_PIC_STRUCT_FRAME: 7774 cur->interlaced_frame = 0; 7775 break; 7776 case SEI_PIC_STRUCT_TOP_FIELD: 7777 case SEI_PIC_STRUCT_BOTTOM_FIELD: 7778 case SEI_PIC_STRUCT_TOP_BOTTOM: 7779 case SEI_PIC_STRUCT_BOTTOM_TOP: 7780 cur->interlaced_frame = 1; 7781 break; 7782 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: 7783 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: 7784 // Signal the possibility of telecined film externally (pic_struct 5,6) 7785 // From these hints, let the applications decide if they apply deinterlacing. 7786 cur->repeat_pict = 1; 7787 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; 7788 break; 7789 case SEI_PIC_STRUCT_FRAME_DOUBLING: 7790 // Force progressive here, as doubling interlaced frame is a bad idea. 7791 cur->interlaced_frame = 0; 7792 cur->repeat_pict = 2; 7793 break; 7794 case SEI_PIC_STRUCT_FRAME_TRIPLING: 7795 cur->interlaced_frame = 0; 7796 cur->repeat_pict = 4; 7797 break; 7798 } 7799 }else{ 7800 /* Derive interlacing flag from used decoding process. */ 7801 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; 7802 } 7803 7804 if (cur->field_poc[0] != cur->field_poc[1]){ 7805 /* Derive top_field_first from field pocs. */ 7806 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; 7807 }else{ 7808 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ 7809 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ 7810 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM 7811 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) 7812 cur->top_field_first = 1; 7813 else 7814 cur->top_field_first = 0; 7815 }else{ 7816 /* Most likely progressive */ 7817 cur->top_field_first = 0; 7818 } 7819 } 7820 7821 //FIXME do something with unavailable reference frames 7822 7823 /* Sort B-frames into display order */ 7824 7825 if(h->sps.bitstream_restriction_flag 7826 && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 7827 s->avctx->has_b_frames = h->sps.num_reorder_frames; 7828 s->low_delay = 0; 7829 } 7830 7831 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT 7832 && !h->sps.bitstream_restriction_flag){ 7833 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; 7834 s->low_delay= 0; 7835 } 7836 7837 pics = 0; 7838 while(h->delayed_pic[pics]) pics++; 7839 7840 assert(pics <= MAX_DELAYED_PIC_COUNT); 7841 7842 h->delayed_pic[pics++] = cur; 7843 if(cur->reference == 0) 7844 cur->reference = DELAYED_PIC_REF; 7845 7846 out = h->delayed_pic[0]; 7847 out_idx = 0; 7848 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++) 7849 if(h->delayed_pic[i]->poc < out->poc){ 7850 out = h->delayed_pic[i]; 7851 out_idx = i; 7852 } 7853 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame; 7854 7855 out_of_order = !cross_idr && out->poc < h->outputed_poc; 7856 7857 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) 7858 { } 7859 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) 7860 || (s->low_delay && 7861 ((!cross_idr && out->poc > h->outputed_poc + 2) 7862 || cur->pict_type == FF_B_TYPE))) 7863 { 7864 s->low_delay = 0; 7865 s->avctx->has_b_frames++; 7866 } 7867 7868 if(out_of_order || pics > s->avctx->has_b_frames){ 7869 out->reference &= ~DELAYED_PIC_REF; 7870 for(i=out_idx; h->delayed_pic[i]; i++) 7871 h->delayed_pic[i] = h->delayed_pic[i+1]; 7872 } 7873 if(!out_of_order && pics > s->avctx->has_b_frames){ 7874 *data_size = sizeof(AVFrame); 7875 7876 h->outputed_poc = out->poc; 7877 *pict= *(AVFrame*)out; 7878 }else{ 7879 av_log(avctx, AV_LOG_DEBUG, "no picture\n"); 7880 } 7881 } 7882 } 7883 7884 assert(pict->data[0] || !*data_size); 7885 ff_print_debug_info(s, pict); 7886//printf("out %d\n", (int)pict->data[0]); 7887#if 0 //? 7888 7889 /* Return the Picture timestamp as the frame number */ 7890 /* we subtract 1 because it is added on utils.c */ 7891 avctx->frame_number = s->picture_number - 1; 7892#endif 7893 return get_consumed_bytes(s, buf_index, buf_size); 7894} 7895#if 0 7896static inline void fill_mb_avail(H264Context *h){ 7897 MpegEncContext * const s = &h->s; 7898 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 7899 7900 if(s->mb_y){ 7901 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; 7902 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; 7903 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; 7904 }else{ 7905 h->mb_avail[0]= 7906 h->mb_avail[1]= 7907 h->mb_avail[2]= 0; 7908 } 7909 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; 7910 h->mb_avail[4]= 1; //FIXME move out 7911 h->mb_avail[5]= 0; //FIXME move out 7912} 7913#endif 7914 7915#ifdef TEST 7916#undef printf 7917#undef random 7918#define COUNT 8000 7919#define SIZE (COUNT*40) 7920int main(void){ 7921 int i; 7922 uint8_t temp[SIZE]; 7923 PutBitContext pb; 7924 GetBitContext gb; 7925// int int_temp[10000]; 7926 DSPContext dsp; 7927 AVCodecContext avctx; 7928 7929 dsputil_init(&dsp, &avctx); 7930 7931 init_put_bits(&pb, temp, SIZE); 7932 printf("testing unsigned exp golomb\n"); 7933 for(i=0; i<COUNT; i++){ 7934 START_TIMER 7935 set_ue_golomb(&pb, i); 7936 STOP_TIMER("set_ue_golomb"); 7937 } 7938 flush_put_bits(&pb); 7939 7940 init_get_bits(&gb, temp, 8*SIZE); 7941 for(i=0; i<COUNT; i++){ 7942 int j, s; 7943 7944 s= show_bits(&gb, 24); 7945 7946 START_TIMER 7947 j= get_ue_golomb(&gb); 7948 if(j != i){ 7949 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 7950// return -1; 7951 } 7952 STOP_TIMER("get_ue_golomb"); 7953 } 7954 7955 7956 init_put_bits(&pb, temp, SIZE); 7957 printf("testing signed exp golomb\n"); 7958 for(i=0; i<COUNT; i++){ 7959 START_TIMER 7960 set_se_golomb(&pb, i - COUNT/2); 7961 STOP_TIMER("set_se_golomb"); 7962 } 7963 flush_put_bits(&pb); 7964 7965 init_get_bits(&gb, temp, 8*SIZE); 7966 for(i=0; i<COUNT; i++){ 7967 int j, s; 7968 7969 s= show_bits(&gb, 24); 7970 7971 START_TIMER 7972 j= get_se_golomb(&gb); 7973 if(j != i - COUNT/2){ 7974 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 7975// return -1; 7976 } 7977 STOP_TIMER("get_se_golomb"); 7978 } 7979 7980#if 0 7981 printf("testing 4x4 (I)DCT\n"); 7982 7983 DCTELEM block[16]; 7984 uint8_t src[16], ref[16]; 7985 uint64_t error= 0, max_error=0; 7986 7987 for(i=0; i<COUNT; i++){ 7988 int j; 7989// printf("%d %d %d\n", r1, r2, (r2-r1)*16); 7990 for(j=0; j<16; j++){ 7991 ref[j]= random()%255; 7992 src[j]= random()%255; 7993 } 7994 7995 h264_diff_dct_c(block, src, ref, 4); 7996 7997 //normalize 7998 for(j=0; j<16; j++){ 7999// printf("%d ", block[j]); 8000 block[j]= block[j]*4; 8001 if(j&1) block[j]= (block[j]*4 + 2)/5; 8002 if(j&4) block[j]= (block[j]*4 + 2)/5; 8003 } 8004// printf("\n"); 8005 8006 s->dsp.h264_idct_add(ref, block, 4); 8007/* for(j=0; j<16; j++){ 8008 printf("%d ", ref[j]); 8009 } 8010 printf("\n");*/ 8011 8012 for(j=0; j<16; j++){ 8013 int diff= FFABS(src[j] - ref[j]); 8014 8015 error+= diff*diff; 8016 max_error= FFMAX(max_error, diff); 8017 } 8018 } 8019 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); 8020 printf("testing quantizer\n"); 8021 for(qp=0; qp<52; qp++){ 8022 for(i=0; i<16; i++) 8023 src1_block[i]= src2_block[i]= random()%255; 8024 8025 } 8026 printf("Testing NAL layer\n"); 8027 8028 uint8_t bitstream[COUNT]; 8029 uint8_t nal[COUNT*2]; 8030 H264Context h; 8031 memset(&h, 0, sizeof(H264Context)); 8032 8033 for(i=0; i<COUNT; i++){ 8034 int zeros= i; 8035 int nal_length; 8036 int consumed; 8037 int out_length; 8038 uint8_t *out; 8039 int j; 8040 8041 for(j=0; j<COUNT; j++){ 8042 bitstream[j]= (random() % 255) + 1; 8043 } 8044 8045 for(j=0; j<zeros; j++){ 8046 int pos= random() % COUNT; 8047 while(bitstream[pos] == 0){ 8048 pos++; 8049 pos %= COUNT; 8050 } 8051 bitstream[pos]=0; 8052 } 8053 8054 START_TIMER 8055 8056 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2); 8057 if(nal_length<0){ 8058 printf("encoding failed\n"); 8059 return -1; 8060 } 8061 8062 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length); 8063 8064 STOP_TIMER("NAL") 8065 8066 if(out_length != COUNT){ 8067 printf("incorrect length %d %d\n", out_length, COUNT); 8068 return -1; 8069 } 8070 8071 if(consumed != nal_length){ 8072 printf("incorrect consumed length %d %d\n", nal_length, consumed); 8073 return -1; 8074 } 8075 8076 if(memcmp(bitstream, out, COUNT)){ 8077 printf("mismatch\n"); 8078 return -1; 8079 } 8080 } 8081#endif 8082 8083 printf("Testing RBSP\n"); 8084 8085 8086 return 0; 8087} 8088#endif /* TEST */ 8089 8090 8091static av_cold int decode_end(AVCodecContext *avctx) 8092{ 8093 H264Context *h = avctx->priv_data; 8094 MpegEncContext *s = &h->s; 8095 int i; 8096 8097 av_freep(&h->rbsp_buffer[0]); 8098 av_freep(&h->rbsp_buffer[1]); 8099 free_tables(h); //FIXME cleanup init stuff perhaps 8100 8101 for(i = 0; i < MAX_SPS_COUNT; i++) 8102 av_freep(h->sps_buffers + i); 8103 8104 for(i = 0; i < MAX_PPS_COUNT; i++) 8105 av_freep(h->pps_buffers + i); 8106 8107 MPV_common_end(s); 8108 8109// memset(h, 0, sizeof(H264Context)); 8110 8111 return 0; 8112} 8113 8114 8115AVCodec h264_decoder = { 8116 "h264", 8117 CODEC_TYPE_VIDEO, 8118 CODEC_ID_H264, 8119 sizeof(H264Context), 8120 decode_init, 8121 NULL, 8122 decode_end, 8123 decode_frame, 8124 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY, 8125 .flush= flush_dpb, 8126 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), 8127 .pix_fmts= ff_hwaccel_pixfmt_list_420, 8128}; 8129 8130#if CONFIG_H264_VDPAU_DECODER 8131AVCodec h264_vdpau_decoder = { 8132 "h264_vdpau", 8133 CODEC_TYPE_VIDEO, 8134 CODEC_ID_H264, 8135 sizeof(H264Context), 8136 decode_init, 8137 NULL, 8138 decode_end, 8139 decode_frame, 8140 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, 8141 .flush= flush_dpb, 8142 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), 8143}; 8144#endif 8145 8146#if CONFIG_SVQ3_DECODER 8147#include "svq3.c" 8148#endif 8149