1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG4 part10 codec. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "internal.h" 29#include "dsputil.h" 30#include "avcodec.h" 31#include "mpegvideo.h" 32#include "h264.h" 33#include "h264data.h" 34#include "h264_mvpred.h" 35#include "h264_parser.h" 36#include "golomb.h" 37#include "mathops.h" 38#include "rectangle.h" 39#include "vdpau_internal.h" 40 41#include "cabac.h" 42 43//#undef NDEBUG 44#include <assert.h> 45 46static const uint8_t rem6[52]={ 470, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 48}; 49 50static const uint8_t div6[52]={ 510, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 52}; 53 54void ff_h264_write_back_intra_pred_mode(H264Context *h){ 55 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; 56 57 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4); 58 mode[4]= h->intra4x4_pred_mode_cache[7+8*3]; 59 mode[5]= h->intra4x4_pred_mode_cache[7+8*2]; 60 mode[6]= h->intra4x4_pred_mode_cache[7+8*1]; 61} 62 63/** 64 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 65 */ 66int ff_h264_check_intra4x4_pred_mode(H264Context *h){ 67 MpegEncContext * const s = &h->s; 68 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; 69 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; 70 int i; 71 72 if(!(h->top_samples_available&0x8000)){ 73 for(i=0; i<4; i++){ 74 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; 75 if(status<0){ 76 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 77 return -1; 78 } else if(status){ 79 h->intra4x4_pred_mode_cache[scan8[0] + i]= status; 80 } 81 } 82 } 83 84 if((h->left_samples_available&0x8888)!=0x8888){ 85 static const int mask[4]={0x8000,0x2000,0x80,0x20}; 86 for(i=0; i<4; i++){ 87 if(!(h->left_samples_available&mask[i])){ 88 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; 89 if(status<0){ 90 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); 91 return -1; 92 } else if(status){ 93 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; 94 } 95 } 96 } 97 } 98 99 return 0; 100} //FIXME cleanup like ff_h264_check_intra_pred_mode 101 102/** 103 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. 104 */ 105int ff_h264_check_intra_pred_mode(H264Context *h, int mode){ 106 MpegEncContext * const s = &h->s; 107 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; 108 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; 109 110 if(mode > 6U) { 111 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); 112 return -1; 113 } 114 115 if(!(h->top_samples_available&0x8000)){ 116 mode= top[ mode ]; 117 if(mode<0){ 118 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 119 return -1; 120 } 121 } 122 123 if((h->left_samples_available&0x8080) != 0x8080){ 124 mode= left[ mode ]; 125 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred 126 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8); 127 } 128 if(mode<0){ 129 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); 130 return -1; 131 } 132 } 133 134 return mode; 135} 136 137const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){ 138 int i, si, di; 139 uint8_t *dst; 140 int bufidx; 141 142// src[0]&0x80; //forbidden bit 143 h->nal_ref_idc= src[0]>>5; 144 h->nal_unit_type= src[0]&0x1F; 145 146 src++; length--; 147#if 0 148 for(i=0; i<length; i++) 149 printf("%2X ", src[i]); 150#endif 151 152#if HAVE_FAST_UNALIGNED 153# if HAVE_FAST_64BIT 154# define RS 7 155 for(i=0; i+1<length; i+=9){ 156 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) 157# else 158# define RS 3 159 for(i=0; i+1<length; i+=5){ 160 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) 161# endif 162 continue; 163 if(i>0 && !src[i]) i--; 164 while(src[i]) i++; 165#else 166# define RS 0 167 for(i=0; i+1<length; i+=2){ 168 if(src[i]) continue; 169 if(i>0 && src[i-1]==0) i--; 170#endif 171 if(i+2<length && src[i+1]==0 && src[i+2]<=3){ 172 if(src[i+2]!=3){ 173 /* startcode, so we must be past the end */ 174 length=i; 175 } 176 break; 177 } 178 i-= RS; 179 } 180 181 if(i>=length-1){ //no escaped 0 182 *dst_length= length; 183 *consumed= length+1; //+1 for the header 184 return src; 185 } 186 187 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data 188 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE); 189 dst= h->rbsp_buffer[bufidx]; 190 191 if (dst == NULL){ 192 return NULL; 193 } 194 195//printf("decoding esc\n"); 196 memcpy(dst, src, i); 197 si=di=i; 198 while(si+2<length){ 199 //remove escapes (very rare 1:2^22) 200 if(src[si+2]>3){ 201 dst[di++]= src[si++]; 202 dst[di++]= src[si++]; 203 }else if(src[si]==0 && src[si+1]==0){ 204 if(src[si+2]==3){ //escape 205 dst[di++]= 0; 206 dst[di++]= 0; 207 si+=3; 208 continue; 209 }else //next start code 210 goto nsc; 211 } 212 213 dst[di++]= src[si++]; 214 } 215 while(si<length) 216 dst[di++]= src[si++]; 217nsc: 218 219 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE); 220 221 *dst_length= di; 222 *consumed= si + 1;//+1 for the header 223//FIXME store exact number of bits in the getbitcontext (it is needed for decoding) 224 return dst; 225} 226 227int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){ 228 int v= *src; 229 int r; 230 231 tprintf(h->s.avctx, "rbsp trailing %X\n", v); 232 233 for(r=1; r<9; r++){ 234 if(v&1) return r; 235 v>>=1; 236 } 237 return 0; 238} 239 240/** 241 * IDCT transforms the 16 dc values and dequantizes them. 242 * @param qp quantization parameter 243 */ 244static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ 245#define stride 16 246 int i; 247 int temp[16]; //FIXME check if this is a good idea 248 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 249 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 250 251//memset(block, 64, 2*256); 252//return; 253 for(i=0; i<4; i++){ 254 const int offset= y_offset[i]; 255 const int z0= block[offset+stride*0] + block[offset+stride*4]; 256 const int z1= block[offset+stride*0] - block[offset+stride*4]; 257 const int z2= block[offset+stride*1] - block[offset+stride*5]; 258 const int z3= block[offset+stride*1] + block[offset+stride*5]; 259 260 temp[4*i+0]= z0+z3; 261 temp[4*i+1]= z1+z2; 262 temp[4*i+2]= z1-z2; 263 temp[4*i+3]= z0-z3; 264 } 265 266 for(i=0; i<4; i++){ 267 const int offset= x_offset[i]; 268 const int z0= temp[4*0+i] + temp[4*2+i]; 269 const int z1= temp[4*0+i] - temp[4*2+i]; 270 const int z2= temp[4*1+i] - temp[4*3+i]; 271 const int z3= temp[4*1+i] + temp[4*3+i]; 272 273 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual 274 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); 275 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); 276 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); 277 } 278} 279 280#if 0 281/** 282 * DCT transforms the 16 dc values. 283 * @param qp quantization parameter ??? FIXME 284 */ 285static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ 286// const int qmul= dequant_coeff[qp][0]; 287 int i; 288 int temp[16]; //FIXME check if this is a good idea 289 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; 290 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; 291 292 for(i=0; i<4; i++){ 293 const int offset= y_offset[i]; 294 const int z0= block[offset+stride*0] + block[offset+stride*4]; 295 const int z1= block[offset+stride*0] - block[offset+stride*4]; 296 const int z2= block[offset+stride*1] - block[offset+stride*5]; 297 const int z3= block[offset+stride*1] + block[offset+stride*5]; 298 299 temp[4*i+0]= z0+z3; 300 temp[4*i+1]= z1+z2; 301 temp[4*i+2]= z1-z2; 302 temp[4*i+3]= z0-z3; 303 } 304 305 for(i=0; i<4; i++){ 306 const int offset= x_offset[i]; 307 const int z0= temp[4*0+i] + temp[4*2+i]; 308 const int z1= temp[4*0+i] - temp[4*2+i]; 309 const int z2= temp[4*1+i] - temp[4*3+i]; 310 const int z3= temp[4*1+i] + temp[4*3+i]; 311 312 block[stride*0 +offset]= (z0 + z3)>>1; 313 block[stride*2 +offset]= (z1 + z2)>>1; 314 block[stride*8 +offset]= (z1 - z2)>>1; 315 block[stride*10+offset]= (z0 - z3)>>1; 316 } 317} 318#endif 319 320#undef xStride 321#undef stride 322 323static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ 324 const int stride= 16*2; 325 const int xStride= 16; 326 int a,b,c,d,e; 327 328 a= block[stride*0 + xStride*0]; 329 b= block[stride*0 + xStride*1]; 330 c= block[stride*1 + xStride*0]; 331 d= block[stride*1 + xStride*1]; 332 333 e= a-b; 334 a= a+b; 335 b= c-d; 336 c= c+d; 337 338 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; 339 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; 340 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; 341 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; 342} 343 344#if 0 345static void chroma_dc_dct_c(DCTELEM *block){ 346 const int stride= 16*2; 347 const int xStride= 16; 348 int a,b,c,d,e; 349 350 a= block[stride*0 + xStride*0]; 351 b= block[stride*0 + xStride*1]; 352 c= block[stride*1 + xStride*0]; 353 d= block[stride*1 + xStride*1]; 354 355 e= a-b; 356 a= a+b; 357 b= c-d; 358 c= c+d; 359 360 block[stride*0 + xStride*0]= (a+c); 361 block[stride*0 + xStride*1]= (e+b); 362 block[stride*1 + xStride*0]= (a-c); 363 block[stride*1 + xStride*1]= (e-b); 364} 365#endif 366 367static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, 368 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 369 int src_x_offset, int src_y_offset, 370 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ 371 MpegEncContext * const s = &h->s; 372 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; 373 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; 374 const int luma_xy= (mx&3) + ((my&3)<<2); 375 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; 376 uint8_t * src_cb, * src_cr; 377 int extra_width= h->emu_edge_width; 378 int extra_height= h->emu_edge_height; 379 int emu=0; 380 const int full_mx= mx>>2; 381 const int full_my= my>>2; 382 const int pic_width = 16*s->mb_width; 383 const int pic_height = 16*s->mb_height >> MB_FIELD; 384 385 if(mx&7) extra_width -= 3; 386 if(my&7) extra_height -= 3; 387 388 if( full_mx < 0-extra_width 389 || full_my < 0-extra_height 390 || full_mx + 16/*FIXME*/ > pic_width + extra_width 391 || full_my + 16/*FIXME*/ > pic_height + extra_height){ 392 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); 393 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; 394 emu=1; 395 } 396 397 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? 398 if(!square){ 399 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); 400 } 401 402 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return; 403 404 if(MB_FIELD){ 405 // chroma offset when predicting from a field of opposite parity 406 my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); 407 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); 408 } 409 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; 410 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; 411 412 if(emu){ 413 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 414 src_cb= s->edge_emu_buffer; 415 } 416 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); 417 418 if(emu){ 419 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); 420 src_cr= s->edge_emu_buffer; 421 } 422 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); 423} 424 425static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, 426 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 427 int x_offset, int y_offset, 428 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 429 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 430 int list0, int list1){ 431 MpegEncContext * const s = &h->s; 432 qpel_mc_func *qpix_op= qpix_put; 433 h264_chroma_mc_func chroma_op= chroma_put; 434 435 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; 436 dest_cb += x_offset + y_offset*h->mb_uvlinesize; 437 dest_cr += x_offset + y_offset*h->mb_uvlinesize; 438 x_offset += 8*s->mb_x; 439 y_offset += 8*(s->mb_y >> MB_FIELD); 440 441 if(list0){ 442 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; 443 mc_dir_part(h, ref, n, square, chroma_height, delta, 0, 444 dest_y, dest_cb, dest_cr, x_offset, y_offset, 445 qpix_op, chroma_op); 446 447 qpix_op= qpix_avg; 448 chroma_op= chroma_avg; 449 } 450 451 if(list1){ 452 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; 453 mc_dir_part(h, ref, n, square, chroma_height, delta, 1, 454 dest_y, dest_cb, dest_cr, x_offset, y_offset, 455 qpix_op, chroma_op); 456 } 457} 458 459static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, 460 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 461 int x_offset, int y_offset, 462 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 463 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, 464 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, 465 int list0, int list1){ 466 MpegEncContext * const s = &h->s; 467 468 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; 469 dest_cb += x_offset + y_offset*h->mb_uvlinesize; 470 dest_cr += x_offset + y_offset*h->mb_uvlinesize; 471 x_offset += 8*s->mb_x; 472 y_offset += 8*(s->mb_y >> MB_FIELD); 473 474 if(list0 && list1){ 475 /* don't optimize for luma-only case, since B-frames usually 476 * use implicit weights => chroma too. */ 477 uint8_t *tmp_cb = s->obmc_scratchpad; 478 uint8_t *tmp_cr = s->obmc_scratchpad + 8; 479 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; 480 int refn0 = h->ref_cache[0][ scan8[n] ]; 481 int refn1 = h->ref_cache[1][ scan8[n] ]; 482 483 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, 484 dest_y, dest_cb, dest_cr, 485 x_offset, y_offset, qpix_put, chroma_put); 486 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, 487 tmp_y, tmp_cb, tmp_cr, 488 x_offset, y_offset, qpix_put, chroma_put); 489 490 if(h->use_weight == 2){ 491 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1]; 492 int weight1 = 64 - weight0; 493 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); 494 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); 495 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); 496 }else{ 497 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, 498 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0], 499 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]); 500 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 501 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0], 502 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]); 503 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 504 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0], 505 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]); 506 } 507 }else{ 508 int list = list1 ? 1 : 0; 509 int refn = h->ref_cache[list][ scan8[n] ]; 510 Picture *ref= &h->ref_list[list][refn]; 511 mc_dir_part(h, ref, n, square, chroma_height, delta, list, 512 dest_y, dest_cb, dest_cr, x_offset, y_offset, 513 qpix_put, chroma_put); 514 515 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, 516 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]); 517 if(h->use_weight_chroma){ 518 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, 519 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]); 520 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, 521 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]); 522 } 523 } 524} 525 526static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, 527 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 528 int x_offset, int y_offset, 529 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, 530 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, 531 h264_weight_func *weight_op, h264_biweight_func *weight_avg, 532 int list0, int list1){ 533 if((h->use_weight==2 && list0 && list1 534 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32)) 535 || h->use_weight==1) 536 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 537 x_offset, y_offset, qpix_put, chroma_put, 538 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); 539 else 540 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, 541 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); 542} 543 544static inline void prefetch_motion(H264Context *h, int list){ 545 /* fetch pixels for estimated mv 4 macroblocks ahead 546 * optimized for 64byte cache lines */ 547 MpegEncContext * const s = &h->s; 548 const int refn = h->ref_cache[list][scan8[0]]; 549 if(refn >= 0){ 550 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; 551 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; 552 uint8_t **src= h->ref_list[list][refn].data; 553 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; 554 s->dsp.prefetch(src[0]+off, s->linesize, 4); 555 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; 556 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 557 } 558} 559 560static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, 561 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), 562 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), 563 h264_weight_func *weight_op, h264_biweight_func *weight_avg){ 564 MpegEncContext * const s = &h->s; 565 const int mb_xy= h->mb_xy; 566 const int mb_type= s->current_picture.mb_type[mb_xy]; 567 568 assert(IS_INTER(mb_type)); 569 570 prefetch_motion(h, 0); 571 572 if(IS_16X16(mb_type)){ 573 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, 574 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], 575 weight_op, weight_avg, 576 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 577 }else if(IS_16X8(mb_type)){ 578 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, 579 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 580 &weight_op[1], &weight_avg[1], 581 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 582 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, 583 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], 584 &weight_op[1], &weight_avg[1], 585 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 586 }else if(IS_8X16(mb_type)){ 587 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, 588 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 589 &weight_op[2], &weight_avg[2], 590 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); 591 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, 592 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 593 &weight_op[2], &weight_avg[2], 594 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); 595 }else{ 596 int i; 597 598 assert(IS_8X8(mb_type)); 599 600 for(i=0; i<4; i++){ 601 const int sub_mb_type= h->sub_mb_type[i]; 602 const int n= 4*i; 603 int x_offset= (i&1)<<2; 604 int y_offset= (i&2)<<1; 605 606 if(IS_SUB_8X8(sub_mb_type)){ 607 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, 608 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], 609 &weight_op[3], &weight_avg[3], 610 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 611 }else if(IS_SUB_8X4(sub_mb_type)){ 612 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, 613 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 614 &weight_op[4], &weight_avg[4], 615 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 616 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, 617 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], 618 &weight_op[4], &weight_avg[4], 619 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 620 }else if(IS_SUB_4X8(sub_mb_type)){ 621 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, 622 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 623 &weight_op[5], &weight_avg[5], 624 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 625 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, 626 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 627 &weight_op[5], &weight_avg[5], 628 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 629 }else{ 630 int j; 631 assert(IS_SUB_4X4(sub_mb_type)); 632 for(j=0; j<4; j++){ 633 int sub_x_offset= x_offset + 2*(j&1); 634 int sub_y_offset= y_offset + (j&2); 635 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, 636 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], 637 &weight_op[6], &weight_avg[6], 638 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); 639 } 640 } 641 } 642 } 643 644 prefetch_motion(h, 1); 645} 646 647 648static void free_tables(H264Context *h){ 649 int i; 650 H264Context *hx; 651 av_freep(&h->intra4x4_pred_mode); 652 av_freep(&h->chroma_pred_mode_table); 653 av_freep(&h->cbp_table); 654 av_freep(&h->mvd_table[0]); 655 av_freep(&h->mvd_table[1]); 656 av_freep(&h->direct_table); 657 av_freep(&h->non_zero_count); 658 av_freep(&h->slice_table_base); 659 h->slice_table= NULL; 660 av_freep(&h->list_counts); 661 662 av_freep(&h->mb2b_xy); 663 av_freep(&h->mb2br_xy); 664 665 for(i = 0; i < MAX_THREADS; i++) { 666 hx = h->thread_context[i]; 667 if(!hx) continue; 668 av_freep(&hx->top_borders[1]); 669 av_freep(&hx->top_borders[0]); 670 av_freep(&hx->s.obmc_scratchpad); 671 av_freep(&hx->rbsp_buffer[1]); 672 av_freep(&hx->rbsp_buffer[0]); 673 hx->rbsp_buffer_size[0] = 0; 674 hx->rbsp_buffer_size[1] = 0; 675 if (i) av_freep(&h->thread_context[i]); 676 } 677} 678 679static void init_dequant8_coeff_table(H264Context *h){ 680 int i,q,x; 681 const int transpose = (h->h264dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly 682 h->dequant8_coeff[0] = h->dequant8_buffer[0]; 683 h->dequant8_coeff[1] = h->dequant8_buffer[1]; 684 685 for(i=0; i<2; i++ ){ 686 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ 687 h->dequant8_coeff[1] = h->dequant8_buffer[0]; 688 break; 689 } 690 691 for(q=0; q<52; q++){ 692 int shift = div6[q]; 693 int idx = rem6[q]; 694 for(x=0; x<64; x++) 695 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = 696 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * 697 h->pps.scaling_matrix8[i][x]) << shift; 698 } 699 } 700} 701 702static void init_dequant4_coeff_table(H264Context *h){ 703 int i,j,q,x; 704 const int transpose = (h->h264dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly 705 for(i=0; i<6; i++ ){ 706 h->dequant4_coeff[i] = h->dequant4_buffer[i]; 707 for(j=0; j<i; j++){ 708 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ 709 h->dequant4_coeff[i] = h->dequant4_buffer[j]; 710 break; 711 } 712 } 713 if(j<i) 714 continue; 715 716 for(q=0; q<52; q++){ 717 int shift = div6[q] + 2; 718 int idx = rem6[q]; 719 for(x=0; x<16; x++) 720 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = 721 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * 722 h->pps.scaling_matrix4[i][x]) << shift; 723 } 724 } 725} 726 727static void init_dequant_tables(H264Context *h){ 728 int i,x; 729 init_dequant4_coeff_table(h); 730 if(h->pps.transform_8x8_mode) 731 init_dequant8_coeff_table(h); 732 if(h->sps.transform_bypass){ 733 for(i=0; i<6; i++) 734 for(x=0; x<16; x++) 735 h->dequant4_coeff[i][0][x] = 1<<6; 736 if(h->pps.transform_8x8_mode) 737 for(i=0; i<2; i++) 738 for(x=0; x<64; x++) 739 h->dequant8_coeff[i][0][x] = 1<<6; 740 } 741} 742 743 744int ff_h264_alloc_tables(H264Context *h){ 745 MpegEncContext * const s = &h->s; 746 const int big_mb_num= s->mb_stride * (s->mb_height+1); 747 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count; 748 int x,y; 749 750 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail) 751 752 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail) 753 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail) 754 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail) 755 756 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail) 757 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail); 758 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail); 759 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail); 760 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail) 761 762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); 763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; 764 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); 766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); 767 for(y=0; y<s->mb_height; y++){ 768 for(x=0; x<s->mb_width; x++){ 769 const int mb_xy= x + y*s->mb_stride; 770 const int b_xy = 4*x + 4*y*h->b_stride; 771 772 h->mb2b_xy [mb_xy]= b_xy; 773 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); 774 } 775 } 776 777 s->obmc_scratchpad = NULL; 778 779 if(!h->dequant4_coeff[0]) 780 init_dequant_tables(h); 781 782 return 0; 783fail: 784 free_tables(h); 785 return -1; 786} 787 788/** 789 * Mimic alloc_tables(), but for every context thread. 790 */ 791static void clone_tables(H264Context *dst, H264Context *src, int i){ 792 MpegEncContext * const s = &src->s; 793 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride; 794 dst->non_zero_count = src->non_zero_count; 795 dst->slice_table = src->slice_table; 796 dst->cbp_table = src->cbp_table; 797 dst->mb2b_xy = src->mb2b_xy; 798 dst->mb2br_xy = src->mb2br_xy; 799 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; 800 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride; 801 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride; 802 dst->direct_table = src->direct_table; 803 dst->list_counts = src->list_counts; 804 805 dst->s.obmc_scratchpad = NULL; 806 ff_h264_pred_init(&dst->hpc, src->s.codec_id); 807} 808 809/** 810 * Init context 811 * Allocate buffers which are not shared amongst multiple threads. 812 */ 813static int context_init(H264Context *h){ 814 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) 815 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) 816 817 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = 818 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; 819 820 return 0; 821fail: 822 return -1; // free_tables will clean up for us 823} 824 825static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size); 826 827static av_cold void common_init(H264Context *h){ 828 MpegEncContext * const s = &h->s; 829 830 s->width = s->avctx->width; 831 s->height = s->avctx->height; 832 s->codec_id= s->avctx->codec->id; 833 834 ff_h264dsp_init(&h->h264dsp); 835 ff_h264_pred_init(&h->hpc, s->codec_id); 836 837 h->dequant_coeff_pps= -1; 838 s->unrestricted_mv=1; 839 s->decode=1; //FIXME 840 841 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early 842 843 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); 844 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); 845} 846 847av_cold int ff_h264_decode_init(AVCodecContext *avctx){ 848 H264Context *h= avctx->priv_data; 849 MpegEncContext * const s = &h->s; 850 851 MPV_decode_defaults(s); 852 853 s->avctx = avctx; 854 common_init(h); 855 856 s->out_format = FMT_H264; 857 s->workaround_bugs= avctx->workaround_bugs; 858 859 // set defaults 860// s->decode_mb= ff_h263_decode_mb; 861 s->quarter_sample = 1; 862 if(!avctx->has_b_frames) 863 s->low_delay= 1; 864 865 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; 866 867 ff_h264_decode_init_vlc(); 868 869 h->thread_context[0] = h; 870 h->outputed_poc = INT_MIN; 871 h->prev_poc_msb= 1<<16; 872 h->x264_build = -1; 873 ff_h264_reset_sei(h); 874 if(avctx->codec_id == CODEC_ID_H264){ 875 if(avctx->ticks_per_frame == 1){ 876 s->avctx->time_base.den *=2; 877 } 878 avctx->ticks_per_frame = 2; 879 } 880 881 if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){ 882 int i, cnt, nalsize; 883 unsigned char *p = avctx->extradata; 884 885 h->is_avc = 1; 886 887 if(avctx->extradata_size < 7) { 888 av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); 889 return -1; 890 } 891 /* sps and pps in the avcC always have length coded with 2 bytes, 892 so put a fake nal_length_size = 2 while parsing them */ 893 h->nal_length_size = 2; 894 // Decode sps from avcC 895 cnt = *(p+5) & 0x1f; // Number of sps 896 p += 6; 897 for (i = 0; i < cnt; i++) { 898 nalsize = AV_RB16(p) + 2; 899 if(decode_nal_units(h, p, nalsize) < 0) { 900 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); 901 return -1; 902 } 903 p += nalsize; 904 } 905 // Decode pps from avcC 906 cnt = *(p++); // Number of pps 907 for (i = 0; i < cnt; i++) { 908 nalsize = AV_RB16(p) + 2; 909 if(decode_nal_units(h, p, nalsize) != nalsize) { 910 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); 911 return -1; 912 } 913 p += nalsize; 914 } 915 // Now store right nal length size, that will be use to parse all other nals 916 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; 917 } else { 918 h->is_avc = 0; 919 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) 920 return -1; 921 } 922 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 923 s->avctx->has_b_frames = h->sps.num_reorder_frames; 924 s->low_delay = 0; 925 } 926 927 return 0; 928} 929 930int ff_h264_frame_start(H264Context *h){ 931 MpegEncContext * const s = &h->s; 932 int i; 933 934 if(MPV_frame_start(s, s->avctx) < 0) 935 return -1; 936 ff_er_frame_start(s); 937 /* 938 * MPV_frame_start uses pict_type to derive key_frame. 939 * This is incorrect for H.264; IDR markings must be used. 940 * Zero here; IDR markings per slice in frame or fields are ORed in later. 941 * See decode_nal_units(). 942 */ 943 s->current_picture_ptr->key_frame= 0; 944 s->current_picture_ptr->mmco_reset= 0; 945 946 assert(s->linesize && s->uvlinesize); 947 948 for(i=0; i<16; i++){ 949 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); 950 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); 951 } 952 for(i=0; i<4; i++){ 953 h->block_offset[16+i]= 954 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); 955 h->block_offset[24+16+i]= 956 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); 957 } 958 959 /* can't be in alloc_tables because linesize isn't known there. 960 * FIXME: redo bipred weight to not require extra buffer? */ 961 for(i = 0; i < s->avctx->thread_count; i++) 962 if(!h->thread_context[i]->s.obmc_scratchpad) 963 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); 964 965 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/ 966 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); 967 968// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; 969 970 // We mark the current picture as non-reference after allocating it, so 971 // that if we break out due to an error it can be released automatically 972 // in the next MPV_frame_start(). 973 // SVQ3 as well as most other codecs have only last/next/current and thus 974 // get released even with set reference, besides SVQ3 and others do not 975 // mark frames as reference later "naturally". 976 if(s->codec_id != CODEC_ID_SVQ3) 977 s->current_picture_ptr->reference= 0; 978 979 s->current_picture_ptr->field_poc[0]= 980 s->current_picture_ptr->field_poc[1]= INT_MAX; 981 assert(s->current_picture_ptr->long_ref==0); 982 983 return 0; 984} 985 986static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ 987 MpegEncContext * const s = &h->s; 988 uint8_t *top_border; 989 int top_idx = 1; 990 991 src_y -= linesize; 992 src_cb -= uvlinesize; 993 src_cr -= uvlinesize; 994 995 if(!simple && FRAME_MBAFF){ 996 if(s->mb_y&1){ 997 if(!MB_MBAFF){ 998 top_border = h->top_borders[0][s->mb_x]; 999 AV_COPY128(top_border, src_y + 15*linesize); 1000 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 1001 AV_COPY64(top_border+16, src_cb+7*uvlinesize); 1002 AV_COPY64(top_border+24, src_cr+7*uvlinesize); 1003 } 1004 } 1005 }else if(MB_MBAFF){ 1006 top_idx = 0; 1007 }else 1008 return; 1009 } 1010 1011 top_border = h->top_borders[top_idx][s->mb_x]; 1012 // There are two lines saved, the line above the the top macroblock of a pair, 1013 // and the line above the bottom macroblock 1014 AV_COPY128(top_border, src_y + 16*linesize); 1015 1016 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 1017 AV_COPY64(top_border+16, src_cb+8*uvlinesize); 1018 AV_COPY64(top_border+24, src_cr+8*uvlinesize); 1019 } 1020} 1021 1022static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ 1023 MpegEncContext * const s = &h->s; 1024 int deblock_left; 1025 int deblock_top; 1026 int top_idx = 1; 1027 uint8_t *top_border_m1; 1028 uint8_t *top_border; 1029 1030 if(!simple && FRAME_MBAFF){ 1031 if(s->mb_y&1){ 1032 if(!MB_MBAFF) 1033 return; 1034 }else{ 1035 top_idx = MB_MBAFF ? 0 : 1; 1036 } 1037 } 1038 1039 if(h->deblocking_filter == 2) { 1040 deblock_left = h->left_type[0]; 1041 deblock_top = h->top_type; 1042 } else { 1043 deblock_left = (s->mb_x > 0); 1044 deblock_top = (s->mb_y > !!MB_FIELD); 1045 } 1046 1047 src_y -= linesize + 1; 1048 src_cb -= uvlinesize + 1; 1049 src_cr -= uvlinesize + 1; 1050 1051 top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; 1052 top_border = h->top_borders[top_idx][s->mb_x]; 1053 1054#define XCHG(a,b,xchg)\ 1055if (xchg) AV_SWAP64(b,a);\ 1056else AV_COPY64(b,a); 1057 1058 if(deblock_top){ 1059 if(deblock_left){ 1060 XCHG(top_border_m1+8, src_y -7, 1); 1061 } 1062 XCHG(top_border+0, src_y +1, xchg); 1063 XCHG(top_border+8, src_y +9, 1); 1064 if(s->mb_x+1 < s->mb_width){ 1065 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); 1066 } 1067 } 1068 1069 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 1070 if(deblock_top){ 1071 if(deblock_left){ 1072 XCHG(top_border_m1+16, src_cb -7, 1); 1073 XCHG(top_border_m1+24, src_cr -7, 1); 1074 } 1075 XCHG(top_border+16, src_cb+1, 1); 1076 XCHG(top_border+24, src_cr+1, 1); 1077 } 1078 } 1079} 1080 1081static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ 1082 MpegEncContext * const s = &h->s; 1083 const int mb_x= s->mb_x; 1084 const int mb_y= s->mb_y; 1085 const int mb_xy= h->mb_xy; 1086 const int mb_type= s->current_picture.mb_type[mb_xy]; 1087 uint8_t *dest_y, *dest_cb, *dest_cr; 1088 int linesize, uvlinesize /*dct_offset*/; 1089 int i; 1090 int *block_offset = &h->block_offset[0]; 1091 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass); 1092 /* is_h264 should always be true if SVQ3 is disabled. */ 1093 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; 1094 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); 1095 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); 1096 1097 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; 1098 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; 1099 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; 1100 1101 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); 1102 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); 1103 1104 h->list_counts[mb_xy]= h->list_count; 1105 1106 if (!simple && MB_FIELD) { 1107 linesize = h->mb_linesize = s->linesize * 2; 1108 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 1109 block_offset = &h->block_offset[24]; 1110 if(mb_y&1){ //FIXME move out of this function? 1111 dest_y -= s->linesize*15; 1112 dest_cb-= s->uvlinesize*7; 1113 dest_cr-= s->uvlinesize*7; 1114 } 1115 if(FRAME_MBAFF) { 1116 int list; 1117 for(list=0; list<h->list_count; list++){ 1118 if(!USES_LIST(mb_type, list)) 1119 continue; 1120 if(IS_16X16(mb_type)){ 1121 int8_t *ref = &h->ref_cache[list][scan8[0]]; 1122 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1); 1123 }else{ 1124 for(i=0; i<16; i+=4){ 1125 int ref = h->ref_cache[list][scan8[i]]; 1126 if(ref >= 0) 1127 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1); 1128 } 1129 } 1130 } 1131 } 1132 } else { 1133 linesize = h->mb_linesize = s->linesize; 1134 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 1135// dct_offset = s->linesize * 16; 1136 } 1137 1138 if (!simple && IS_INTRA_PCM(mb_type)) { 1139 for (i=0; i<16; i++) { 1140 memcpy(dest_y + i* linesize, h->mb + i*8, 16); 1141 } 1142 for (i=0; i<8; i++) { 1143 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); 1144 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); 1145 } 1146 } else { 1147 if(IS_INTRA(mb_type)){ 1148 if(h->deblocking_filter) 1149 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); 1150 1151 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ 1152 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); 1153 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); 1154 } 1155 1156 if(IS_INTRA4x4(mb_type)){ 1157 if(simple || !s->encoding){ 1158 if(IS_8x8DCT(mb_type)){ 1159 if(transform_bypass){ 1160 idct_dc_add = 1161 idct_add = s->dsp.add_pixels8; 1162 }else{ 1163 idct_dc_add = h->h264dsp.h264_idct8_dc_add; 1164 idct_add = h->h264dsp.h264_idct8_add; 1165 } 1166 for(i=0; i<16; i+=4){ 1167 uint8_t * const ptr= dest_y + block_offset[i]; 1168 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 1169 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 1170 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); 1171 }else{ 1172 const int nnz = h->non_zero_count_cache[ scan8[i] ]; 1173 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, 1174 (h->topright_samples_available<<i)&0x4000, linesize); 1175 if(nnz){ 1176 if(nnz == 1 && h->mb[i*16]) 1177 idct_dc_add(ptr, h->mb + i*16, linesize); 1178 else 1179 idct_add (ptr, h->mb + i*16, linesize); 1180 } 1181 } 1182 } 1183 }else{ 1184 if(transform_bypass){ 1185 idct_dc_add = 1186 idct_add = s->dsp.add_pixels4; 1187 }else{ 1188 idct_dc_add = h->h264dsp.h264_idct_dc_add; 1189 idct_add = h->h264dsp.h264_idct_add; 1190 } 1191 for(i=0; i<16; i++){ 1192 uint8_t * const ptr= dest_y + block_offset[i]; 1193 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; 1194 1195 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ 1196 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); 1197 }else{ 1198 uint8_t *topright; 1199 int nnz, tr; 1200 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ 1201 const int topright_avail= (h->topright_samples_available<<i)&0x8000; 1202 assert(mb_y || linesize <= block_offset[i]); 1203 if(!topright_avail){ 1204 tr= ptr[3 - linesize]*0x01010101; 1205 topright= (uint8_t*) &tr; 1206 }else 1207 topright= ptr + 4 - linesize; 1208 }else 1209 topright= NULL; 1210 1211 h->hpc.pred4x4[ dir ](ptr, topright, linesize); 1212 nnz = h->non_zero_count_cache[ scan8[i] ]; 1213 if(nnz){ 1214 if(is_h264){ 1215 if(nnz == 1 && h->mb[i*16]) 1216 idct_dc_add(ptr, h->mb + i*16, linesize); 1217 else 1218 idct_add (ptr, h->mb + i*16, linesize); 1219 }else 1220 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); 1221 } 1222 } 1223 } 1224 } 1225 } 1226 }else{ 1227 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); 1228 if(is_h264){ 1229 if(!transform_bypass) 1230 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]); 1231 }else 1232 ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); 1233 } 1234 if(h->deblocking_filter) 1235 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); 1236 }else if(is_h264){ 1237 hl_motion(h, dest_y, dest_cb, dest_cr, 1238 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, 1239 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, 1240 h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); 1241 } 1242 1243 1244 if(!IS_INTRA4x4(mb_type)){ 1245 if(is_h264){ 1246 if(IS_INTRA16x16(mb_type)){ 1247 if(transform_bypass){ 1248 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){ 1249 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); 1250 }else{ 1251 for(i=0; i<16; i++){ 1252 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) 1253 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); 1254 } 1255 } 1256 }else{ 1257 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 1258 } 1259 }else if(h->cbp&15){ 1260 if(transform_bypass){ 1261 const int di = IS_8x8DCT(mb_type) ? 4 : 1; 1262 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; 1263 for(i=0; i<16; i+=di){ 1264 if(h->non_zero_count_cache[ scan8[i] ]){ 1265 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); 1266 } 1267 } 1268 }else{ 1269 if(IS_8x8DCT(mb_type)){ 1270 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 1271 }else{ 1272 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache); 1273 } 1274 } 1275 } 1276 }else{ 1277 for(i=0; i<16; i++){ 1278 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below 1279 uint8_t * const ptr= dest_y + block_offset[i]; 1280 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); 1281 } 1282 } 1283 } 1284 } 1285 1286 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){ 1287 uint8_t *dest[2] = {dest_cb, dest_cr}; 1288 if(transform_bypass){ 1289 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ 1290 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); 1291 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); 1292 }else{ 1293 idct_add = s->dsp.add_pixels4; 1294 for(i=16; i<16+8; i++){ 1295 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]) 1296 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 1297 } 1298 } 1299 }else{ 1300 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); 1301 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); 1302 if(is_h264){ 1303 idct_add = h->h264dsp.h264_idct_add; 1304 idct_dc_add = h->h264dsp.h264_idct_dc_add; 1305 for(i=16; i<16+8; i++){ 1306 if(h->non_zero_count_cache[ scan8[i] ]) 1307 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 1308 else if(h->mb[i*16]) 1309 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); 1310 } 1311 }else{ 1312 for(i=16; i<16+8; i++){ 1313 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ 1314 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; 1315 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2); 1316 } 1317 } 1318 } 1319 } 1320 } 1321 } 1322 if(h->cbp || IS_INTRA(mb_type)) 1323 s->dsp.clear_blocks(h->mb); 1324} 1325 1326/** 1327 * Process a macroblock; this case avoids checks for expensive uncommon cases. 1328 */ 1329static void hl_decode_mb_simple(H264Context *h){ 1330 hl_decode_mb_internal(h, 1); 1331} 1332 1333/** 1334 * Process a macroblock; this handles edge cases, such as interlacing. 1335 */ 1336static void av_noinline hl_decode_mb_complex(H264Context *h){ 1337 hl_decode_mb_internal(h, 0); 1338} 1339 1340void ff_h264_hl_decode_mb(H264Context *h){ 1341 MpegEncContext * const s = &h->s; 1342 const int mb_xy= h->mb_xy; 1343 const int mb_type= s->current_picture.mb_type[mb_xy]; 1344 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; 1345 1346 if (is_complex) 1347 hl_decode_mb_complex(h); 1348 else hl_decode_mb_simple(h); 1349} 1350 1351static int pred_weight_table(H264Context *h){ 1352 MpegEncContext * const s = &h->s; 1353 int list, i; 1354 int luma_def, chroma_def; 1355 1356 h->use_weight= 0; 1357 h->use_weight_chroma= 0; 1358 h->luma_log2_weight_denom= get_ue_golomb(&s->gb); 1359 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); 1360 luma_def = 1<<h->luma_log2_weight_denom; 1361 chroma_def = 1<<h->chroma_log2_weight_denom; 1362 1363 for(list=0; list<2; list++){ 1364 h->luma_weight_flag[list] = 0; 1365 h->chroma_weight_flag[list] = 0; 1366 for(i=0; i<h->ref_count[list]; i++){ 1367 int luma_weight_flag, chroma_weight_flag; 1368 1369 luma_weight_flag= get_bits1(&s->gb); 1370 if(luma_weight_flag){ 1371 h->luma_weight[i][list][0]= get_se_golomb(&s->gb); 1372 h->luma_weight[i][list][1]= get_se_golomb(&s->gb); 1373 if( h->luma_weight[i][list][0] != luma_def 1374 || h->luma_weight[i][list][1] != 0) { 1375 h->use_weight= 1; 1376 h->luma_weight_flag[list]= 1; 1377 } 1378 }else{ 1379 h->luma_weight[i][list][0]= luma_def; 1380 h->luma_weight[i][list][1]= 0; 1381 } 1382 1383 if(CHROMA){ 1384 chroma_weight_flag= get_bits1(&s->gb); 1385 if(chroma_weight_flag){ 1386 int j; 1387 for(j=0; j<2; j++){ 1388 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb); 1389 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb); 1390 if( h->chroma_weight[i][list][j][0] != chroma_def 1391 || h->chroma_weight[i][list][j][1] != 0) { 1392 h->use_weight_chroma= 1; 1393 h->chroma_weight_flag[list]= 1; 1394 } 1395 } 1396 }else{ 1397 int j; 1398 for(j=0; j<2; j++){ 1399 h->chroma_weight[i][list][j][0]= chroma_def; 1400 h->chroma_weight[i][list][j][1]= 0; 1401 } 1402 } 1403 } 1404 } 1405 if(h->slice_type_nos != FF_B_TYPE) break; 1406 } 1407 h->use_weight= h->use_weight || h->use_weight_chroma; 1408 return 0; 1409} 1410 1411/** 1412 * Initialize implicit_weight table. 1413 * @param field, 0/1 initialize the weight for interlaced MBAFF 1414 * -1 initializes the rest 1415 */ 1416static void implicit_weight_table(H264Context *h, int field){ 1417 MpegEncContext * const s = &h->s; 1418 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; 1419 1420 for (i = 0; i < 2; i++) { 1421 h->luma_weight_flag[i] = 0; 1422 h->chroma_weight_flag[i] = 0; 1423 } 1424 1425 if(field < 0){ 1426 cur_poc = s->current_picture_ptr->poc; 1427 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF 1428 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ 1429 h->use_weight= 0; 1430 h->use_weight_chroma= 0; 1431 return; 1432 } 1433 ref_start= 0; 1434 ref_count0= h->ref_count[0]; 1435 ref_count1= h->ref_count[1]; 1436 }else{ 1437 cur_poc = s->current_picture_ptr->field_poc[field]; 1438 ref_start= 16; 1439 ref_count0= 16+2*h->ref_count[0]; 1440 ref_count1= 16+2*h->ref_count[1]; 1441 } 1442 1443 h->use_weight= 2; 1444 h->use_weight_chroma= 2; 1445 h->luma_log2_weight_denom= 5; 1446 h->chroma_log2_weight_denom= 5; 1447 1448 for(ref0=ref_start; ref0 < ref_count0; ref0++){ 1449 int poc0 = h->ref_list[0][ref0].poc; 1450 for(ref1=ref_start; ref1 < ref_count1; ref1++){ 1451 int poc1 = h->ref_list[1][ref1].poc; 1452 int td = av_clip(poc1 - poc0, -128, 127); 1453 int w= 32; 1454 if(td){ 1455 int tb = av_clip(cur_poc - poc0, -128, 127); 1456 int tx = (16384 + (FFABS(td) >> 1)) / td; 1457 int dist_scale_factor = (tb*tx + 32) >> 8; 1458 if(dist_scale_factor >= -64 && dist_scale_factor <= 128) 1459 w = 64 - dist_scale_factor; 1460 } 1461 if(field<0){ 1462 h->implicit_weight[ref0][ref1][0]= 1463 h->implicit_weight[ref0][ref1][1]= w; 1464 }else{ 1465 h->implicit_weight[ref0][ref1][field]=w; 1466 } 1467 } 1468 } 1469} 1470 1471/** 1472 * instantaneous decoder refresh. 1473 */ 1474static void idr(H264Context *h){ 1475 ff_h264_remove_all_refs(h); 1476 h->prev_frame_num= 0; 1477 h->prev_frame_num_offset= 0; 1478 h->prev_poc_msb= 1479 h->prev_poc_lsb= 0; 1480} 1481 1482/* forget old pics after a seek */ 1483static void flush_dpb(AVCodecContext *avctx){ 1484 H264Context *h= avctx->priv_data; 1485 int i; 1486 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) { 1487 if(h->delayed_pic[i]) 1488 h->delayed_pic[i]->reference= 0; 1489 h->delayed_pic[i]= NULL; 1490 } 1491 h->outputed_poc= INT_MIN; 1492 h->prev_interlaced_frame = 1; 1493 idr(h); 1494 if(h->s.current_picture_ptr) 1495 h->s.current_picture_ptr->reference= 0; 1496 h->s.first_field= 0; 1497 ff_h264_reset_sei(h); 1498 ff_mpeg_flush(avctx); 1499} 1500 1501static int init_poc(H264Context *h){ 1502 MpegEncContext * const s = &h->s; 1503 const int max_frame_num= 1<<h->sps.log2_max_frame_num; 1504 int field_poc[2]; 1505 Picture *cur = s->current_picture_ptr; 1506 1507 h->frame_num_offset= h->prev_frame_num_offset; 1508 if(h->frame_num < h->prev_frame_num) 1509 h->frame_num_offset += max_frame_num; 1510 1511 if(h->sps.poc_type==0){ 1512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb; 1513 1514 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) 1515 h->poc_msb = h->prev_poc_msb + max_poc_lsb; 1516 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) 1517 h->poc_msb = h->prev_poc_msb - max_poc_lsb; 1518 else 1519 h->poc_msb = h->prev_poc_msb; 1520//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); 1521 field_poc[0] = 1522 field_poc[1] = h->poc_msb + h->poc_lsb; 1523 if(s->picture_structure == PICT_FRAME) 1524 field_poc[1] += h->delta_poc_bottom; 1525 }else if(h->sps.poc_type==1){ 1526 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; 1527 int i; 1528 1529 if(h->sps.poc_cycle_length != 0) 1530 abs_frame_num = h->frame_num_offset + h->frame_num; 1531 else 1532 abs_frame_num = 0; 1533 1534 if(h->nal_ref_idc==0 && abs_frame_num > 0) 1535 abs_frame_num--; 1536 1537 expected_delta_per_poc_cycle = 0; 1538 for(i=0; i < h->sps.poc_cycle_length; i++) 1539 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse 1540 1541 if(abs_frame_num > 0){ 1542 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; 1543 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; 1544 1545 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; 1546 for(i = 0; i <= frame_num_in_poc_cycle; i++) 1547 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; 1548 } else 1549 expectedpoc = 0; 1550 1551 if(h->nal_ref_idc == 0) 1552 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; 1553 1554 field_poc[0] = expectedpoc + h->delta_poc[0]; 1555 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; 1556 1557 if(s->picture_structure == PICT_FRAME) 1558 field_poc[1] += h->delta_poc[1]; 1559 }else{ 1560 int poc= 2*(h->frame_num_offset + h->frame_num); 1561 1562 if(!h->nal_ref_idc) 1563 poc--; 1564 1565 field_poc[0]= poc; 1566 field_poc[1]= poc; 1567 } 1568 1569 if(s->picture_structure != PICT_BOTTOM_FIELD) 1570 s->current_picture_ptr->field_poc[0]= field_poc[0]; 1571 if(s->picture_structure != PICT_TOP_FIELD) 1572 s->current_picture_ptr->field_poc[1]= field_poc[1]; 1573 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]); 1574 1575 return 0; 1576} 1577 1578 1579/** 1580 * initialize scan tables 1581 */ 1582static void init_scan_tables(H264Context *h){ 1583 int i; 1584 if(h->h264dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly 1585 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); 1586 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); 1587 }else{ 1588 for(i=0; i<16; i++){ 1589#define T(x) (x>>2) | ((x<<2) & 0xF) 1590 h->zigzag_scan[i] = T(zigzag_scan[i]); 1591 h-> field_scan[i] = T( field_scan[i]); 1592#undef T 1593 } 1594 } 1595 if(h->h264dsp.h264_idct8_add == ff_h264_idct8_add_c){ 1596 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t)); 1597 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); 1598 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); 1599 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); 1600 }else{ 1601 for(i=0; i<64; i++){ 1602#define T(x) (x>>3) | ((x&7)<<3) 1603 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]); 1604 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); 1605 h->field_scan8x8[i] = T(field_scan8x8[i]); 1606 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); 1607#undef T 1608 } 1609 } 1610 if(h->sps.transform_bypass){ //FIXME same ugly 1611 h->zigzag_scan_q0 = zigzag_scan; 1612 h->zigzag_scan8x8_q0 = ff_zigzag_direct; 1613 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; 1614 h->field_scan_q0 = field_scan; 1615 h->field_scan8x8_q0 = field_scan8x8; 1616 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; 1617 }else{ 1618 h->zigzag_scan_q0 = h->zigzag_scan; 1619 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; 1620 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; 1621 h->field_scan_q0 = h->field_scan; 1622 h->field_scan8x8_q0 = h->field_scan8x8; 1623 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; 1624 } 1625} 1626 1627static void field_end(H264Context *h){ 1628 MpegEncContext * const s = &h->s; 1629 AVCodecContext * const avctx= s->avctx; 1630 s->mb_y= 0; 1631 1632 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; 1633 s->current_picture_ptr->pict_type= s->pict_type; 1634 1635 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 1636 ff_vdpau_h264_set_reference_frames(s); 1637 1638 if(!s->dropable) { 1639 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index); 1640 h->prev_poc_msb= h->poc_msb; 1641 h->prev_poc_lsb= h->poc_lsb; 1642 } 1643 h->prev_frame_num_offset= h->frame_num_offset; 1644 h->prev_frame_num= h->frame_num; 1645 1646 if (avctx->hwaccel) { 1647 if (avctx->hwaccel->end_frame(avctx) < 0) 1648 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); 1649 } 1650 1651 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 1652 ff_vdpau_h264_picture_complete(s); 1653 1654 /* 1655 * FIXME: Error handling code does not seem to support interlaced 1656 * when slices span multiple rows 1657 * The ff_er_add_slice calls don't work right for bottom 1658 * fields; they cause massive erroneous error concealing 1659 * Error marking covers both fields (top and bottom). 1660 * This causes a mismatched s->error_count 1661 * and a bad error table. Further, the error count goes to 1662 * INT_MAX when called for bottom field, because mb_y is 1663 * past end by one (callers fault) and resync_mb_y != 0 1664 * causes problems for the first MB line, too. 1665 */ 1666 if (!FIELD_PICTURE) 1667 ff_er_frame_end(s); 1668 1669 MPV_frame_end(s); 1670 1671 h->current_slice=0; 1672} 1673 1674/** 1675 * Replicates H264 "master" context to thread contexts. 1676 */ 1677static void clone_slice(H264Context *dst, H264Context *src) 1678{ 1679 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset)); 1680 dst->s.current_picture_ptr = src->s.current_picture_ptr; 1681 dst->s.current_picture = src->s.current_picture; 1682 dst->s.linesize = src->s.linesize; 1683 dst->s.uvlinesize = src->s.uvlinesize; 1684 dst->s.first_field = src->s.first_field; 1685 1686 dst->prev_poc_msb = src->prev_poc_msb; 1687 dst->prev_poc_lsb = src->prev_poc_lsb; 1688 dst->prev_frame_num_offset = src->prev_frame_num_offset; 1689 dst->prev_frame_num = src->prev_frame_num; 1690 dst->short_ref_count = src->short_ref_count; 1691 1692 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref)); 1693 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref)); 1694 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list)); 1695 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list)); 1696 1697 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff)); 1698 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff)); 1699} 1700 1701/** 1702 * decodes a slice header. 1703 * This will also call MPV_common_init() and frame_start() as needed. 1704 * 1705 * @param h h264context 1706 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding) 1707 * 1708 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded 1709 */ 1710static int decode_slice_header(H264Context *h, H264Context *h0){ 1711 MpegEncContext * const s = &h->s; 1712 MpegEncContext * const s0 = &h0->s; 1713 unsigned int first_mb_in_slice; 1714 unsigned int pps_id; 1715 int num_ref_idx_active_override_flag; 1716 unsigned int slice_type, tmp, i, j; 1717 int default_ref_list_done = 0; 1718 int last_pic_structure; 1719 1720 s->dropable= h->nal_ref_idc == 0; 1721 1722 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){ 1723 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; 1724 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; 1725 }else{ 1726 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; 1727 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; 1728 } 1729 1730 first_mb_in_slice= get_ue_golomb(&s->gb); 1731 1732 if(first_mb_in_slice == 0){ //FIXME better field boundary detection 1733 if(h0->current_slice && FIELD_PICTURE){ 1734 field_end(h); 1735 } 1736 1737 h0->current_slice = 0; 1738 if (!s0->first_field) 1739 s->current_picture_ptr= NULL; 1740 } 1741 1742 slice_type= get_ue_golomb_31(&s->gb); 1743 if(slice_type > 9){ 1744 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); 1745 return -1; 1746 } 1747 if(slice_type > 4){ 1748 slice_type -= 5; 1749 h->slice_type_fixed=1; 1750 }else 1751 h->slice_type_fixed=0; 1752 1753 slice_type= golomb_to_pict_type[ slice_type ]; 1754 if (slice_type == FF_I_TYPE 1755 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) { 1756 default_ref_list_done = 1; 1757 } 1758 h->slice_type= slice_type; 1759 h->slice_type_nos= slice_type & 3; 1760 1761 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though 1762 1763 pps_id= get_ue_golomb(&s->gb); 1764 if(pps_id>=MAX_PPS_COUNT){ 1765 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); 1766 return -1; 1767 } 1768 if(!h0->pps_buffers[pps_id]) { 1769 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id); 1770 return -1; 1771 } 1772 h->pps= *h0->pps_buffers[pps_id]; 1773 1774 if(!h0->sps_buffers[h->pps.sps_id]) { 1775 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id); 1776 return -1; 1777 } 1778 h->sps = *h0->sps_buffers[h->pps.sps_id]; 1779 1780 s->avctx->profile = h->sps.profile_idc; 1781 s->avctx->level = h->sps.level_idc; 1782 s->avctx->refs = h->sps.ref_frame_count; 1783 1784 if(h == h0 && h->dequant_coeff_pps != pps_id){ 1785 h->dequant_coeff_pps = pps_id; 1786 init_dequant_tables(h); 1787 } 1788 1789 s->mb_width= h->sps.mb_width; 1790 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 1791 1792 h->b_stride= s->mb_width*4; 1793 1794 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); 1795 if(h->sps.frame_mbs_only_flag) 1796 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); 1797 else 1798 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3); 1799 1800 if (s->context_initialized 1801 && ( s->width != s->avctx->width || s->height != s->avctx->height 1802 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) { 1803 if(h != h0) 1804 return -1; // width / height changed during parallelized decoding 1805 free_tables(h); 1806 flush_dpb(s->avctx); 1807 MPV_common_end(s); 1808 } 1809 if (!s->context_initialized) { 1810 if(h != h0) 1811 return -1; // we cant (re-)initialize context during parallel decoding 1812 1813 avcodec_set_dimensions(s->avctx, s->width, s->height); 1814 s->avctx->sample_aspect_ratio= h->sps.sar; 1815 if(!s->avctx->sample_aspect_ratio.den) 1816 s->avctx->sample_aspect_ratio.den = 1; 1817 1818 if(h->sps.video_signal_type_present_flag){ 1819 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; 1820 if(h->sps.colour_description_present_flag){ 1821 s->avctx->color_primaries = h->sps.color_primaries; 1822 s->avctx->color_trc = h->sps.color_trc; 1823 s->avctx->colorspace = h->sps.colorspace; 1824 } 1825 } 1826 1827 if(h->sps.timing_info_present_flag){ 1828 int64_t den= h->sps.time_scale; 1829 if(h->x264_build < 44U) 1830 den *= 2; 1831 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, 1832 h->sps.num_units_in_tick, den, 1<<30); 1833 } 1834 s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts); 1835 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt); 1836 1837 if (MPV_common_init(s) < 0) 1838 return -1; 1839 s->first_field = 0; 1840 h->prev_interlaced_frame = 1; 1841 1842 init_scan_tables(h); 1843 ff_h264_alloc_tables(h); 1844 1845 for(i = 1; i < s->avctx->thread_count; i++) { 1846 H264Context *c; 1847 c = h->thread_context[i] = av_malloc(sizeof(H264Context)); 1848 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); 1849 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); 1850 c->h264dsp = h->h264dsp; 1851 c->sps = h->sps; 1852 c->pps = h->pps; 1853 init_scan_tables(c); 1854 clone_tables(c, h, i); 1855 } 1856 1857 for(i = 0; i < s->avctx->thread_count; i++) 1858 if(context_init(h->thread_context[i]) < 0) 1859 return -1; 1860 } 1861 1862 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); 1863 1864 h->mb_mbaff = 0; 1865 h->mb_aff_frame = 0; 1866 last_pic_structure = s0->picture_structure; 1867 if(h->sps.frame_mbs_only_flag){ 1868 s->picture_structure= PICT_FRAME; 1869 }else{ 1870 if(get_bits1(&s->gb)) { //field_pic_flag 1871 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag 1872 } else { 1873 s->picture_structure= PICT_FRAME; 1874 h->mb_aff_frame = h->sps.mb_aff; 1875 } 1876 } 1877 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; 1878 1879 if(h0->current_slice == 0){ 1880 while(h->frame_num != h->prev_frame_num && 1881 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){ 1882 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); 1883 if (ff_h264_frame_start(h) < 0) 1884 return -1; 1885 h->prev_frame_num++; 1886 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num; 1887 s->current_picture_ptr->frame_num= h->prev_frame_num; 1888 ff_h264_execute_ref_pic_marking(h, NULL, 0); 1889 } 1890 1891 /* See if we have a decoded first field looking for a pair... */ 1892 if (s0->first_field) { 1893 assert(s0->current_picture_ptr); 1894 assert(s0->current_picture_ptr->data[0]); 1895 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF); 1896 1897 /* figure out if we have a complementary field pair */ 1898 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) { 1899 /* 1900 * Previous field is unmatched. Don't display it, but let it 1901 * remain for reference if marked as such. 1902 */ 1903 s0->current_picture_ptr = NULL; 1904 s0->first_field = FIELD_PICTURE; 1905 1906 } else { 1907 if (h->nal_ref_idc && 1908 s0->current_picture_ptr->reference && 1909 s0->current_picture_ptr->frame_num != h->frame_num) { 1910 /* 1911 * This and previous field were reference, but had 1912 * different frame_nums. Consider this field first in 1913 * pair. Throw away previous field except for reference 1914 * purposes. 1915 */ 1916 s0->first_field = 1; 1917 s0->current_picture_ptr = NULL; 1918 1919 } else { 1920 /* Second field in complementary pair */ 1921 s0->first_field = 0; 1922 } 1923 } 1924 1925 } else { 1926 /* Frame or first field in a potentially complementary pair */ 1927 assert(!s0->current_picture_ptr); 1928 s0->first_field = FIELD_PICTURE; 1929 } 1930 1931 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) { 1932 s0->first_field = 0; 1933 return -1; 1934 } 1935 } 1936 if(h != h0) 1937 clone_slice(h, h0); 1938 1939 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup 1940 1941 assert(s->mb_num == s->mb_width * s->mb_height); 1942 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num || 1943 first_mb_in_slice >= s->mb_num){ 1944 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n"); 1945 return -1; 1946 } 1947 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; 1948 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE; 1949 if (s->picture_structure == PICT_BOTTOM_FIELD) 1950 s->resync_mb_y = s->mb_y = s->mb_y + 1; 1951 assert(s->mb_y < s->mb_height); 1952 1953 if(s->picture_structure==PICT_FRAME){ 1954 h->curr_pic_num= h->frame_num; 1955 h->max_pic_num= 1<< h->sps.log2_max_frame_num; 1956 }else{ 1957 h->curr_pic_num= 2*h->frame_num + 1; 1958 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); 1959 } 1960 1961 if(h->nal_unit_type == NAL_IDR_SLICE){ 1962 get_ue_golomb(&s->gb); /* idr_pic_id */ 1963 } 1964 1965 if(h->sps.poc_type==0){ 1966 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); 1967 1968 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ 1969 h->delta_poc_bottom= get_se_golomb(&s->gb); 1970 } 1971 } 1972 1973 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ 1974 h->delta_poc[0]= get_se_golomb(&s->gb); 1975 1976 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) 1977 h->delta_poc[1]= get_se_golomb(&s->gb); 1978 } 1979 1980 init_poc(h); 1981 1982 if(h->pps.redundant_pic_cnt_present){ 1983 h->redundant_pic_count= get_ue_golomb(&s->gb); 1984 } 1985 1986 //set defaults, might be overridden a few lines later 1987 h->ref_count[0]= h->pps.ref_count[0]; 1988 h->ref_count[1]= h->pps.ref_count[1]; 1989 1990 if(h->slice_type_nos != FF_I_TYPE){ 1991 if(h->slice_type_nos == FF_B_TYPE){ 1992 h->direct_spatial_mv_pred= get_bits1(&s->gb); 1993 } 1994 num_ref_idx_active_override_flag= get_bits1(&s->gb); 1995 1996 if(num_ref_idx_active_override_flag){ 1997 h->ref_count[0]= get_ue_golomb(&s->gb) + 1; 1998 if(h->slice_type_nos==FF_B_TYPE) 1999 h->ref_count[1]= get_ue_golomb(&s->gb) + 1; 2000 2001 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){ 2002 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); 2003 h->ref_count[0]= h->ref_count[1]= 1; 2004 return -1; 2005 } 2006 } 2007 if(h->slice_type_nos == FF_B_TYPE) 2008 h->list_count= 2; 2009 else 2010 h->list_count= 1; 2011 }else 2012 h->list_count= 0; 2013 2014 if(!default_ref_list_done){ 2015 ff_h264_fill_default_ref_list(h); 2016 } 2017 2018 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0) 2019 return -1; 2020 2021 if(h->slice_type_nos!=FF_I_TYPE){ 2022 s->last_picture_ptr= &h->ref_list[0][0]; 2023 ff_copy_picture(&s->last_picture, s->last_picture_ptr); 2024 } 2025 if(h->slice_type_nos==FF_B_TYPE){ 2026 s->next_picture_ptr= &h->ref_list[1][0]; 2027 ff_copy_picture(&s->next_picture, s->next_picture_ptr); 2028 } 2029 2030 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE ) 2031 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) ) 2032 pred_weight_table(h); 2033 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){ 2034 implicit_weight_table(h, -1); 2035 }else { 2036 h->use_weight = 0; 2037 for (i = 0; i < 2; i++) { 2038 h->luma_weight_flag[i] = 0; 2039 h->chroma_weight_flag[i] = 0; 2040 } 2041 } 2042 2043 if(h->nal_ref_idc) 2044 ff_h264_decode_ref_pic_marking(h0, &s->gb); 2045 2046 if(FRAME_MBAFF){ 2047 ff_h264_fill_mbaff_ref_list(h); 2048 2049 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){ 2050 implicit_weight_table(h, 0); 2051 implicit_weight_table(h, 1); 2052 } 2053 } 2054 2055 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred) 2056 ff_h264_direct_dist_scale_factor(h); 2057 ff_h264_direct_ref_list_init(h); 2058 2059 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){ 2060 tmp = get_ue_golomb_31(&s->gb); 2061 if(tmp > 2){ 2062 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n"); 2063 return -1; 2064 } 2065 h->cabac_init_idc= tmp; 2066 } 2067 2068 h->last_qscale_diff = 0; 2069 tmp = h->pps.init_qp + get_se_golomb(&s->gb); 2070 if(tmp>51){ 2071 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp); 2072 return -1; 2073 } 2074 s->qscale= tmp; 2075 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 2076 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 2077 //FIXME qscale / qp ... stuff 2078 if(h->slice_type == FF_SP_TYPE){ 2079 get_bits1(&s->gb); /* sp_for_switch_flag */ 2080 } 2081 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){ 2082 get_se_golomb(&s->gb); /* slice_qs_delta */ 2083 } 2084 2085 h->deblocking_filter = 1; 2086 h->slice_alpha_c0_offset = 52; 2087 h->slice_beta_offset = 52; 2088 if( h->pps.deblocking_filter_parameters_present ) { 2089 tmp= get_ue_golomb_31(&s->gb); 2090 if(tmp > 2){ 2091 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp); 2092 return -1; 2093 } 2094 h->deblocking_filter= tmp; 2095 if(h->deblocking_filter < 2) 2096 h->deblocking_filter^= 1; // 1<->0 2097 2098 if( h->deblocking_filter ) { 2099 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1; 2100 h->slice_beta_offset += get_se_golomb(&s->gb) << 1; 2101 if( h->slice_alpha_c0_offset > 104U 2102 || h->slice_beta_offset > 104U){ 2103 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset); 2104 return -1; 2105 } 2106 } 2107 } 2108 2109 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL 2110 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE) 2111 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE) 2112 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 2113 h->deblocking_filter= 0; 2114 2115 if(h->deblocking_filter == 1 && h0->max_contexts > 1) { 2116 if(s->avctx->flags2 & CODEC_FLAG2_FAST) { 2117 /* Cheat slightly for speed: 2118 Do not bother to deblock across slices. */ 2119 h->deblocking_filter = 2; 2120 } else { 2121 h0->max_contexts = 1; 2122 if(!h0->single_decode_warning) { 2123 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n"); 2124 h0->single_decode_warning = 1; 2125 } 2126 if(h != h0) 2127 return 1; // deblocking switched inside frame 2128 } 2129 } 2130 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]); 2131 2132#if 0 //FMO 2133 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) 2134 slice_group_change_cycle= get_bits(&s->gb, ?); 2135#endif 2136 2137 h0->last_slice_type = slice_type; 2138 h->slice_num = ++h0->current_slice; 2139 if(h->slice_num >= MAX_SLICES){ 2140 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n"); 2141 } 2142 2143 for(j=0; j<2; j++){ 2144 int id_list[16]; 2145 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; 2146 for(i=0; i<16; i++){ 2147 id_list[i]= 60; 2148 if(h->ref_list[j][i].data[0]){ 2149 int k; 2150 uint8_t *base= h->ref_list[j][i].base[0]; 2151 for(k=0; k<h->short_ref_count; k++) 2152 if(h->short_ref[k]->base[0] == base){ 2153 id_list[i]= k; 2154 break; 2155 } 2156 for(k=0; k<h->long_ref_count; k++) 2157 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ 2158 id_list[i]= h->short_ref_count + k; 2159 break; 2160 } 2161 } 2162 } 2163 2164 ref2frm[0]= 2165 ref2frm[1]= -1; 2166 for(i=0; i<16; i++) 2167 ref2frm[i+2]= 4*id_list[i] 2168 +(h->ref_list[j][i].reference&3); 2169 ref2frm[18+0]= 2170 ref2frm[18+1]= -1; 2171 for(i=16; i<48; i++) 2172 ref2frm[i+4]= 4*id_list[(i-16)>>1] 2173 +(h->ref_list[j][i].reference&3); 2174 } 2175 2176 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; 2177 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; 2178 2179 if(s->avctx->debug&FF_DEBUG_PICT_INFO){ 2180 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", 2181 h->slice_num, 2182 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), 2183 first_mb_in_slice, 2184 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "", 2185 pps_id, h->frame_num, 2186 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], 2187 h->ref_count[0], h->ref_count[1], 2188 s->qscale, 2189 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26, 2190 h->use_weight, 2191 h->use_weight==1 && h->use_weight_chroma ? "c" : "", 2192 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "" 2193 ); 2194 } 2195 2196 return 0; 2197} 2198 2199int ff_h264_get_slice_type(const H264Context *h) 2200{ 2201 switch (h->slice_type) { 2202 case FF_P_TYPE: return 0; 2203 case FF_B_TYPE: return 1; 2204 case FF_I_TYPE: return 2; 2205 case FF_SP_TYPE: return 3; 2206 case FF_SI_TYPE: return 4; 2207 default: return -1; 2208 } 2209} 2210 2211/** 2212 * 2213 * @return non zero if the loop filter can be skiped 2214 */ 2215static int fill_filter_caches(H264Context *h, int mb_type){ 2216 MpegEncContext * const s = &h->s; 2217 const int mb_xy= h->mb_xy; 2218 int top_xy, left_xy[2]; 2219 int top_type, left_type[2]; 2220 2221 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 2222 2223 //FIXME deblocking could skip the intra and nnz parts. 2224 2225 /* Wow, what a mess, why didn't they simplify the interlacing & intra 2226 * stuff, I can't imagine that these complex rules are worth it. */ 2227 2228 left_xy[1] = left_xy[0] = mb_xy-1; 2229 if(FRAME_MBAFF){ 2230 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); 2231 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 2232 if(s->mb_y&1){ 2233 if (left_mb_field_flag != curr_mb_field_flag) { 2234 left_xy[0] -= s->mb_stride; 2235 } 2236 }else{ 2237 if(curr_mb_field_flag){ 2238 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); 2239 } 2240 if (left_mb_field_flag != curr_mb_field_flag) { 2241 left_xy[1] += s->mb_stride; 2242 } 2243 } 2244 } 2245 2246 h->top_mb_xy = top_xy; 2247 h->left_mb_xy[0] = left_xy[0]; 2248 h->left_mb_xy[1] = left_xy[1]; 2249 { 2250 //for sufficiently low qp, filtering wouldn't do anything 2251 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp 2252 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice 2253 int qp = s->current_picture.qscale_table[mb_xy]; 2254 if(qp <= qp_thresh 2255 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) 2256 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ 2257 if(!FRAME_MBAFF) 2258 return 1; 2259 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) 2260 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) 2261 return 1; 2262 } 2263 } 2264 2265 top_type = s->current_picture.mb_type[top_xy] ; 2266 left_type[0] = s->current_picture.mb_type[left_xy[0]]; 2267 left_type[1] = s->current_picture.mb_type[left_xy[1]]; 2268 if(h->deblocking_filter == 2){ 2269 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0; 2270 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0; 2271 }else{ 2272 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0; 2273 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0; 2274 } 2275 h->top_type = top_type ; 2276 h->left_type[0]= left_type[0]; 2277 h->left_type[1]= left_type[1]; 2278 2279 if(IS_INTRA(mb_type)) 2280 return 0; 2281 2282 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); 2283 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); 2284 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); 2285 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); 2286 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); 2287 2288 h->cbp= h->cbp_table[mb_xy]; 2289 2290 { 2291 int list; 2292 for(list=0; list<h->list_count; list++){ 2293 int8_t *ref; 2294 int y, b_stride; 2295 int16_t (*mv_dst)[2]; 2296 int16_t (*mv_src)[2]; 2297 2298 if(!USES_LIST(mb_type, list)){ 2299 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 2300 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 2301 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 2302 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 2303 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); 2304 continue; 2305 } 2306 2307 ref = &s->current_picture.ref_index[list][4*mb_xy]; 2308 { 2309 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 2310 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 2311 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 2312 ref += 2; 2313 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 2314 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); 2315 } 2316 2317 b_stride = h->b_stride; 2318 mv_dst = &h->mv_cache[list][scan8[0]]; 2319 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; 2320 for(y=0; y<4; y++){ 2321 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); 2322 } 2323 2324 } 2325 } 2326 2327 2328/* 23290 . T T. T T T T 23301 L . .L . . . . 23312 L . .L . . . . 23323 . T TL . . . . 23334 L . .L . . . . 23345 L . .. . . . . 2335*/ 2336//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 2337 if(top_type){ 2338 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); 2339 } 2340 2341 if(left_type[0]){ 2342 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; 2343 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; 2344 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; 2345 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; 2346 } 2347 2348 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 2349 if(!CABAC && h->pps.transform_8x8_mode){ 2350 if(IS_8x8DCT(top_type)){ 2351 h->non_zero_count_cache[4+8*0]= 2352 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; 2353 h->non_zero_count_cache[6+8*0]= 2354 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; 2355 } 2356 if(IS_8x8DCT(left_type[0])){ 2357 h->non_zero_count_cache[3+8*1]= 2358 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF 2359 } 2360 if(IS_8x8DCT(left_type[1])){ 2361 h->non_zero_count_cache[3+8*3]= 2362 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF 2363 } 2364 2365 if(IS_8x8DCT(mb_type)){ 2366 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= 2367 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; 2368 2369 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= 2370 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; 2371 2372 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= 2373 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; 2374 2375 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= 2376 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; 2377 } 2378 } 2379 2380 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 2381 int list; 2382 for(list=0; list<h->list_count; list++){ 2383 if(USES_LIST(top_type, list)){ 2384 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 2385 const int b8_xy= 4*top_xy + 2; 2386 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 2387 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); 2388 h->ref_cache[list][scan8[0] + 0 - 1*8]= 2389 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; 2390 h->ref_cache[list][scan8[0] + 2 - 1*8]= 2391 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; 2392 }else{ 2393 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 2394 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); 2395 } 2396 2397 if(!IS_INTERLACED(mb_type^left_type[0])){ 2398 if(USES_LIST(left_type[0], list)){ 2399 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 2400 const int b8_xy= 4*left_xy[0] + 1; 2401 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 2402 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); 2403 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); 2404 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); 2405 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); 2406 h->ref_cache[list][scan8[0] - 1 + 0 ]= 2407 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]]; 2408 h->ref_cache[list][scan8[0] - 1 +16 ]= 2409 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]]; 2410 }else{ 2411 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); 2412 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); 2413 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); 2414 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); 2415 h->ref_cache[list][scan8[0] - 1 + 0 ]= 2416 h->ref_cache[list][scan8[0] - 1 + 8 ]= 2417 h->ref_cache[list][scan8[0] - 1 + 16 ]= 2418 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 2419 } 2420 } 2421 } 2422 } 2423 2424 return 0; 2425} 2426 2427static void loop_filter(H264Context *h){ 2428 MpegEncContext * const s = &h->s; 2429 uint8_t *dest_y, *dest_cb, *dest_cr; 2430 int linesize, uvlinesize, mb_x, mb_y; 2431 const int end_mb_y= s->mb_y + FRAME_MBAFF; 2432 const int old_slice_type= h->slice_type; 2433 2434 if(h->deblocking_filter) { 2435 for(mb_x= 0; mb_x<s->mb_width; mb_x++){ 2436 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){ 2437 int mb_xy, mb_type; 2438 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride; 2439 h->slice_num= h->slice_table[mb_xy]; 2440 mb_type= s->current_picture.mb_type[mb_xy]; 2441 h->list_count= h->list_counts[mb_xy]; 2442 2443 if(FRAME_MBAFF) 2444 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type); 2445 2446 s->mb_x= mb_x; 2447 s->mb_y= mb_y; 2448 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; 2449 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; 2450 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; 2451 //FIXME simplify above 2452 2453 if (MB_FIELD) { 2454 linesize = h->mb_linesize = s->linesize * 2; 2455 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; 2456 if(mb_y&1){ //FIXME move out of this function? 2457 dest_y -= s->linesize*15; 2458 dest_cb-= s->uvlinesize*7; 2459 dest_cr-= s->uvlinesize*7; 2460 } 2461 } else { 2462 linesize = h->mb_linesize = s->linesize; 2463 uvlinesize = h->mb_uvlinesize = s->uvlinesize; 2464 } 2465 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); 2466 if(fill_filter_caches(h, mb_type)) 2467 continue; 2468 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]); 2469 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]); 2470 2471 if (FRAME_MBAFF) { 2472 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 2473 } else { 2474 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); 2475 } 2476 } 2477 } 2478 } 2479 h->slice_type= old_slice_type; 2480 s->mb_x= 0; 2481 s->mb_y= end_mb_y - FRAME_MBAFF; 2482 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale); 2483 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale); 2484} 2485 2486static void predict_field_decoding_flag(H264Context *h){ 2487 MpegEncContext * const s = &h->s; 2488 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 2489 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) 2490 ? s->current_picture.mb_type[mb_xy-1] 2491 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) 2492 ? s->current_picture.mb_type[mb_xy-s->mb_stride] 2493 : 0; 2494 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; 2495} 2496 2497static int decode_slice(struct AVCodecContext *avctx, void *arg){ 2498 H264Context *h = *(void**)arg; 2499 MpegEncContext * const s = &h->s; 2500 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; 2501 2502 s->mb_skip_run= -1; 2503 2504 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 || 2505 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); 2506 2507 if( h->pps.cabac ) { 2508 /* realign */ 2509 align_get_bits( &s->gb ); 2510 2511 /* init cabac */ 2512 ff_init_cabac_states( &h->cabac); 2513 ff_init_cabac_decoder( &h->cabac, 2514 s->gb.buffer + get_bits_count(&s->gb)/8, 2515 (get_bits_left(&s->gb) + 7)/8); 2516 2517 ff_h264_init_cabac_states(h); 2518 2519 for(;;){ 2520//START_TIMER 2521 int ret = ff_h264_decode_mb_cabac(h); 2522 int eos; 2523//STOP_TIMER("decode_mb_cabac") 2524 2525 if(ret>=0) ff_h264_hl_decode_mb(h); 2526 2527 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 2528 s->mb_y++; 2529 2530 ret = ff_h264_decode_mb_cabac(h); 2531 2532 if(ret>=0) ff_h264_hl_decode_mb(h); 2533 s->mb_y--; 2534 } 2535 eos = get_cabac_terminate( &h->cabac ); 2536 2537 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){ 2538 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2539 return 0; 2540 } 2541 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { 2542 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); 2543 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 2544 return -1; 2545 } 2546 2547 if( ++s->mb_x >= s->mb_width ) { 2548 s->mb_x = 0; 2549 loop_filter(h); 2550 ff_draw_horiz_band(s, 16*s->mb_y, 16); 2551 ++s->mb_y; 2552 if(FIELD_OR_MBAFF_PICTURE) { 2553 ++s->mb_y; 2554 if(FRAME_MBAFF && s->mb_y < s->mb_height) 2555 predict_field_decoding_flag(h); 2556 } 2557 } 2558 2559 if( eos || s->mb_y >= s->mb_height ) { 2560 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 2561 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2562 return 0; 2563 } 2564 } 2565 2566 } else { 2567 for(;;){ 2568 int ret = ff_h264_decode_mb_cavlc(h); 2569 2570 if(ret>=0) ff_h264_hl_decode_mb(h); 2571 2572 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? 2573 s->mb_y++; 2574 ret = ff_h264_decode_mb_cavlc(h); 2575 2576 if(ret>=0) ff_h264_hl_decode_mb(h); 2577 s->mb_y--; 2578 } 2579 2580 if(ret<0){ 2581 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 2582 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 2583 2584 return -1; 2585 } 2586 2587 if(++s->mb_x >= s->mb_width){ 2588 s->mb_x=0; 2589 loop_filter(h); 2590 ff_draw_horiz_band(s, 16*s->mb_y, 16); 2591 ++s->mb_y; 2592 if(FIELD_OR_MBAFF_PICTURE) { 2593 ++s->mb_y; 2594 if(FRAME_MBAFF && s->mb_y < s->mb_height) 2595 predict_field_decoding_flag(h); 2596 } 2597 if(s->mb_y >= s->mb_height){ 2598 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 2599 2600 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { 2601 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2602 2603 return 0; 2604 }else{ 2605 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2606 2607 return -1; 2608 } 2609 } 2610 } 2611 2612 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ 2613 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); 2614 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ 2615 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2616 2617 return 0; 2618 }else{ 2619 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 2620 2621 return -1; 2622 } 2623 } 2624 } 2625 } 2626 2627#if 0 2628 for(;s->mb_y < s->mb_height; s->mb_y++){ 2629 for(;s->mb_x < s->mb_width; s->mb_x++){ 2630 int ret= decode_mb(h); 2631 2632 ff_h264_hl_decode_mb(h); 2633 2634 if(ret<0){ 2635 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); 2636 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 2637 2638 return -1; 2639 } 2640 2641 if(++s->mb_x >= s->mb_width){ 2642 s->mb_x=0; 2643 if(++s->mb_y >= s->mb_height){ 2644 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 2645 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2646 2647 return 0; 2648 }else{ 2649 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2650 2651 return -1; 2652 } 2653 } 2654 } 2655 2656 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ 2657 if(get_bits_count(s->gb) == s->gb.size_in_bits){ 2658 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); 2659 2660 return 0; 2661 }else{ 2662 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); 2663 2664 return -1; 2665 } 2666 } 2667 } 2668 s->mb_x=0; 2669 ff_draw_horiz_band(s, 16*s->mb_y, 16); 2670 } 2671#endif 2672 return -1; //not reached 2673} 2674 2675/** 2676 * Call decode_slice() for each context. 2677 * 2678 * @param h h264 master context 2679 * @param context_count number of contexts to execute 2680 */ 2681static void execute_decode_slices(H264Context *h, int context_count){ 2682 MpegEncContext * const s = &h->s; 2683 AVCodecContext * const avctx= s->avctx; 2684 H264Context *hx; 2685 int i; 2686 2687 if (s->avctx->hwaccel) 2688 return; 2689 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 2690 return; 2691 if(context_count == 1) { 2692 decode_slice(avctx, &h); 2693 } else { 2694 for(i = 1; i < context_count; i++) { 2695 hx = h->thread_context[i]; 2696 hx->s.error_recognition = avctx->error_recognition; 2697 hx->s.error_count = 0; 2698 } 2699 2700 avctx->execute(avctx, (void *)decode_slice, 2701 h->thread_context, NULL, context_count, sizeof(void*)); 2702 2703 /* pull back stuff from slices to master context */ 2704 hx = h->thread_context[context_count - 1]; 2705 s->mb_x = hx->s.mb_x; 2706 s->mb_y = hx->s.mb_y; 2707 s->dropable = hx->s.dropable; 2708 s->picture_structure = hx->s.picture_structure; 2709 for(i = 1; i < context_count; i++) 2710 h->s.error_count += h->thread_context[i]->s.error_count; 2711 } 2712} 2713 2714 2715static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ 2716 MpegEncContext * const s = &h->s; 2717 AVCodecContext * const avctx= s->avctx; 2718 int buf_index=0; 2719 H264Context *hx; ///< thread context 2720 int context_count = 0; 2721 int next_avc= h->is_avc ? 0 : buf_size; 2722 2723 h->max_contexts = avctx->thread_count; 2724#if 0 2725 int i; 2726 for(i=0; i<50; i++){ 2727 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); 2728 } 2729#endif 2730 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){ 2731 h->current_slice = 0; 2732 if (!s->first_field) 2733 s->current_picture_ptr= NULL; 2734 ff_h264_reset_sei(h); 2735 } 2736 2737 for(;;){ 2738 int consumed; 2739 int dst_length; 2740 int bit_length; 2741 const uint8_t *ptr; 2742 int i, nalsize = 0; 2743 int err; 2744 2745 if(buf_index >= next_avc) { 2746 if(buf_index >= buf_size) break; 2747 nalsize = 0; 2748 for(i = 0; i < h->nal_length_size; i++) 2749 nalsize = (nalsize << 8) | buf[buf_index++]; 2750 if(nalsize <= 1 || nalsize > buf_size - buf_index){ 2751 if(nalsize == 1){ 2752 buf_index++; 2753 continue; 2754 }else{ 2755 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); 2756 break; 2757 } 2758 } 2759 next_avc= buf_index + nalsize; 2760 } else { 2761 // start code prefix search 2762 for(; buf_index + 3 < next_avc; buf_index++){ 2763 // This should always succeed in the first iteration. 2764 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) 2765 break; 2766 } 2767 2768 if(buf_index+3 >= buf_size) break; 2769 2770 buf_index+=3; 2771 if(buf_index >= next_avc) continue; 2772 } 2773 2774 hx = h->thread_context[context_count]; 2775 2776 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); 2777 if (ptr==NULL || dst_length < 0){ 2778 return -1; 2779 } 2780 i= buf_index + consumed; 2781 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc && 2782 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0) 2783 s->workaround_bugs |= FF_BUG_TRUNCATED; 2784 2785 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){ 2786 while(ptr[dst_length - 1] == 0 && dst_length > 0) 2787 dst_length--; 2788 } 2789 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1)); 2790 2791 if(s->avctx->debug&FF_DEBUG_STARTCODE){ 2792 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length); 2793 } 2794 2795 if (h->is_avc && (nalsize != consumed) && nalsize){ 2796 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); 2797 } 2798 2799 buf_index += consumed; 2800 2801 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id 2802 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) 2803 continue; 2804 2805 again: 2806 err = 0; 2807 switch(hx->nal_unit_type){ 2808 case NAL_IDR_SLICE: 2809 if (h->nal_unit_type != NAL_IDR_SLICE) { 2810 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices"); 2811 return -1; 2812 } 2813 idr(h); //FIXME ensure we don't loose some frames if there is reordering 2814 case NAL_SLICE: 2815 init_get_bits(&hx->s.gb, ptr, bit_length); 2816 hx->intra_gb_ptr= 2817 hx->inter_gb_ptr= &hx->s.gb; 2818 hx->s.data_partitioning = 0; 2819 2820 if((err = decode_slice_header(hx, h))) 2821 break; 2822 2823 if (h->current_slice == 1) { 2824 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) 2825 return -1; 2826 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) 2827 ff_vdpau_h264_picture_start(s); 2828 } 2829 2830 s->current_picture_ptr->key_frame |= 2831 (hx->nal_unit_type == NAL_IDR_SLICE) || 2832 (h->sei_recovery_frame_cnt >= 0); 2833 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5 2834 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 2835 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) 2836 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) 2837 && avctx->skip_frame < AVDISCARD_ALL){ 2838 if(avctx->hwaccel) { 2839 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0) 2840 return -1; 2841 }else 2842 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){ 2843 static const uint8_t start_code[] = {0x00, 0x00, 0x01}; 2844 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code)); 2845 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed ); 2846 }else 2847 context_count++; 2848 } 2849 break; 2850 case NAL_DPA: 2851 init_get_bits(&hx->s.gb, ptr, bit_length); 2852 hx->intra_gb_ptr= 2853 hx->inter_gb_ptr= NULL; 2854 2855 if ((err = decode_slice_header(hx, h)) < 0) 2856 break; 2857 2858 hx->s.data_partitioning = 1; 2859 2860 break; 2861 case NAL_DPB: 2862 init_get_bits(&hx->intra_gb, ptr, bit_length); 2863 hx->intra_gb_ptr= &hx->intra_gb; 2864 break; 2865 case NAL_DPC: 2866 init_get_bits(&hx->inter_gb, ptr, bit_length); 2867 hx->inter_gb_ptr= &hx->inter_gb; 2868 2869 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning 2870 && s->context_initialized 2871 && s->hurry_up < 5 2872 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) 2873 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) 2874 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) 2875 && avctx->skip_frame < AVDISCARD_ALL) 2876 context_count++; 2877 break; 2878 case NAL_SEI: 2879 init_get_bits(&s->gb, ptr, bit_length); 2880 ff_h264_decode_sei(h); 2881 break; 2882 case NAL_SPS: 2883 init_get_bits(&s->gb, ptr, bit_length); 2884 ff_h264_decode_seq_parameter_set(h); 2885 2886 if(s->flags& CODEC_FLAG_LOW_DELAY) 2887 s->low_delay=1; 2888 2889 if(avctx->has_b_frames < 2) 2890 avctx->has_b_frames= !s->low_delay; 2891 break; 2892 case NAL_PPS: 2893 init_get_bits(&s->gb, ptr, bit_length); 2894 2895 ff_h264_decode_picture_parameter_set(h, bit_length); 2896 2897 break; 2898 case NAL_AUD: 2899 case NAL_END_SEQUENCE: 2900 case NAL_END_STREAM: 2901 case NAL_FILLER_DATA: 2902 case NAL_SPS_EXT: 2903 case NAL_AUXILIARY_SLICE: 2904 break; 2905 default: 2906 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length); 2907 } 2908 2909 if(context_count == h->max_contexts) { 2910 execute_decode_slices(h, context_count); 2911 context_count = 0; 2912 } 2913 2914 if (err < 0) 2915 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); 2916 else if(err == 1) { 2917 /* Slice could not be decoded in parallel mode, copy down 2918 * NAL unit stuff to context 0 and restart. Note that 2919 * rbsp_buffer is not transferred, but since we no longer 2920 * run in parallel mode this should not be an issue. */ 2921 h->nal_unit_type = hx->nal_unit_type; 2922 h->nal_ref_idc = hx->nal_ref_idc; 2923 hx = h; 2924 goto again; 2925 } 2926 } 2927 if(context_count) 2928 execute_decode_slices(h, context_count); 2929 return buf_index; 2930} 2931 2932/** 2933 * returns the number of bytes consumed for building the current frame 2934 */ 2935static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ 2936 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...) 2937 if(pos+10>buf_size) pos=buf_size; // oops ;) 2938 2939 return pos; 2940} 2941 2942static int decode_frame(AVCodecContext *avctx, 2943 void *data, int *data_size, 2944 AVPacket *avpkt) 2945{ 2946 const uint8_t *buf = avpkt->data; 2947 int buf_size = avpkt->size; 2948 H264Context *h = avctx->priv_data; 2949 MpegEncContext *s = &h->s; 2950 AVFrame *pict = data; 2951 int buf_index; 2952 2953 s->flags= avctx->flags; 2954 s->flags2= avctx->flags2; 2955 2956 /* end of stream, output what is still in the buffers */ 2957 if (buf_size == 0) { 2958 Picture *out; 2959 int i, out_idx; 2960 2961//FIXME factorize this with the output code below 2962 out = h->delayed_pic[0]; 2963 out_idx = 0; 2964 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 2965 if(h->delayed_pic[i]->poc < out->poc){ 2966 out = h->delayed_pic[i]; 2967 out_idx = i; 2968 } 2969 2970 for(i=out_idx; h->delayed_pic[i]; i++) 2971 h->delayed_pic[i] = h->delayed_pic[i+1]; 2972 2973 if(out){ 2974 *data_size = sizeof(AVFrame); 2975 *pict= *(AVFrame*)out; 2976 } 2977 2978 return 0; 2979 } 2980 2981 buf_index=decode_nal_units(h, buf, buf_size); 2982 if(buf_index < 0) 2983 return -1; 2984 2985 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){ 2986 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0; 2987 av_log(avctx, AV_LOG_ERROR, "no frame!\n"); 2988 return -1; 2989 } 2990 2991 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ 2992 Picture *out = s->current_picture_ptr; 2993 Picture *cur = s->current_picture_ptr; 2994 int i, pics, out_of_order, out_idx; 2995 2996 field_end(h); 2997 2998 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { 2999 /* Wait for second field. */ 3000 *data_size = 0; 3001 3002 } else { 3003 cur->interlaced_frame = 0; 3004 cur->repeat_pict = 0; 3005 3006 /* Signal interlacing information externally. */ 3007 /* Prioritize picture timing SEI information over used decoding process if it exists. */ 3008 3009 if(h->sps.pic_struct_present_flag){ 3010 switch (h->sei_pic_struct) 3011 { 3012 case SEI_PIC_STRUCT_FRAME: 3013 break; 3014 case SEI_PIC_STRUCT_TOP_FIELD: 3015 case SEI_PIC_STRUCT_BOTTOM_FIELD: 3016 cur->interlaced_frame = 1; 3017 break; 3018 case SEI_PIC_STRUCT_TOP_BOTTOM: 3019 case SEI_PIC_STRUCT_BOTTOM_TOP: 3020 if (FIELD_OR_MBAFF_PICTURE) 3021 cur->interlaced_frame = 1; 3022 else 3023 // try to flag soft telecine progressive 3024 cur->interlaced_frame = h->prev_interlaced_frame; 3025 break; 3026 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: 3027 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: 3028 // Signal the possibility of telecined film externally (pic_struct 5,6) 3029 // From these hints, let the applications decide if they apply deinterlacing. 3030 cur->repeat_pict = 1; 3031 break; 3032 case SEI_PIC_STRUCT_FRAME_DOUBLING: 3033 // Force progressive here, as doubling interlaced frame is a bad idea. 3034 cur->repeat_pict = 2; 3035 break; 3036 case SEI_PIC_STRUCT_FRAME_TRIPLING: 3037 cur->repeat_pict = 4; 3038 break; 3039 } 3040 3041 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP) 3042 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0; 3043 }else{ 3044 /* Derive interlacing flag from used decoding process. */ 3045 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; 3046 } 3047 h->prev_interlaced_frame = cur->interlaced_frame; 3048 3049 if (cur->field_poc[0] != cur->field_poc[1]){ 3050 /* Derive top_field_first from field pocs. */ 3051 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; 3052 }else{ 3053 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ 3054 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ 3055 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM 3056 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) 3057 cur->top_field_first = 1; 3058 else 3059 cur->top_field_first = 0; 3060 }else{ 3061 /* Most likely progressive */ 3062 cur->top_field_first = 0; 3063 } 3064 } 3065 3066 //FIXME do something with unavailable reference frames 3067 3068 /* Sort B-frames into display order */ 3069 3070 if(h->sps.bitstream_restriction_flag 3071 && s->avctx->has_b_frames < h->sps.num_reorder_frames){ 3072 s->avctx->has_b_frames = h->sps.num_reorder_frames; 3073 s->low_delay = 0; 3074 } 3075 3076 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT 3077 && !h->sps.bitstream_restriction_flag){ 3078 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; 3079 s->low_delay= 0; 3080 } 3081 3082 pics = 0; 3083 while(h->delayed_pic[pics]) pics++; 3084 3085 assert(pics <= MAX_DELAYED_PIC_COUNT); 3086 3087 h->delayed_pic[pics++] = cur; 3088 if(cur->reference == 0) 3089 cur->reference = DELAYED_PIC_REF; 3090 3091 out = h->delayed_pic[0]; 3092 out_idx = 0; 3093 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++) 3094 if(h->delayed_pic[i]->poc < out->poc){ 3095 out = h->delayed_pic[i]; 3096 out_idx = i; 3097 } 3098 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) 3099 h->outputed_poc= INT_MIN; 3100 out_of_order = out->poc < h->outputed_poc; 3101 3102 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) 3103 { } 3104 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) 3105 || (s->low_delay && 3106 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2) 3107 || cur->pict_type == FF_B_TYPE))) 3108 { 3109 s->low_delay = 0; 3110 s->avctx->has_b_frames++; 3111 } 3112 3113 if(out_of_order || pics > s->avctx->has_b_frames){ 3114 out->reference &= ~DELAYED_PIC_REF; 3115 for(i=out_idx; h->delayed_pic[i]; i++) 3116 h->delayed_pic[i] = h->delayed_pic[i+1]; 3117 } 3118 if(!out_of_order && pics > s->avctx->has_b_frames){ 3119 *data_size = sizeof(AVFrame); 3120 3121 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) { 3122 h->outputed_poc = INT_MIN; 3123 } else 3124 h->outputed_poc = out->poc; 3125 *pict= *(AVFrame*)out; 3126 }else{ 3127 av_log(avctx, AV_LOG_DEBUG, "no picture\n"); 3128 } 3129 } 3130 } 3131 3132 assert(pict->data[0] || !*data_size); 3133 ff_print_debug_info(s, pict); 3134//printf("out %d\n", (int)pict->data[0]); 3135 3136 return get_consumed_bytes(s, buf_index, buf_size); 3137} 3138#if 0 3139static inline void fill_mb_avail(H264Context *h){ 3140 MpegEncContext * const s = &h->s; 3141 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; 3142 3143 if(s->mb_y){ 3144 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; 3145 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; 3146 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; 3147 }else{ 3148 h->mb_avail[0]= 3149 h->mb_avail[1]= 3150 h->mb_avail[2]= 0; 3151 } 3152 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; 3153 h->mb_avail[4]= 1; //FIXME move out 3154 h->mb_avail[5]= 0; //FIXME move out 3155} 3156#endif 3157 3158#ifdef TEST 3159#undef printf 3160#undef random 3161#define COUNT 8000 3162#define SIZE (COUNT*40) 3163int main(void){ 3164 int i; 3165 uint8_t temp[SIZE]; 3166 PutBitContext pb; 3167 GetBitContext gb; 3168// int int_temp[10000]; 3169 DSPContext dsp; 3170 AVCodecContext avctx; 3171 3172 dsputil_init(&dsp, &avctx); 3173 3174 init_put_bits(&pb, temp, SIZE); 3175 printf("testing unsigned exp golomb\n"); 3176 for(i=0; i<COUNT; i++){ 3177 START_TIMER 3178 set_ue_golomb(&pb, i); 3179 STOP_TIMER("set_ue_golomb"); 3180 } 3181 flush_put_bits(&pb); 3182 3183 init_get_bits(&gb, temp, 8*SIZE); 3184 for(i=0; i<COUNT; i++){ 3185 int j, s; 3186 3187 s= show_bits(&gb, 24); 3188 3189 START_TIMER 3190 j= get_ue_golomb(&gb); 3191 if(j != i){ 3192 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 3193// return -1; 3194 } 3195 STOP_TIMER("get_ue_golomb"); 3196 } 3197 3198 3199 init_put_bits(&pb, temp, SIZE); 3200 printf("testing signed exp golomb\n"); 3201 for(i=0; i<COUNT; i++){ 3202 START_TIMER 3203 set_se_golomb(&pb, i - COUNT/2); 3204 STOP_TIMER("set_se_golomb"); 3205 } 3206 flush_put_bits(&pb); 3207 3208 init_get_bits(&gb, temp, 8*SIZE); 3209 for(i=0; i<COUNT; i++){ 3210 int j, s; 3211 3212 s= show_bits(&gb, 24); 3213 3214 START_TIMER 3215 j= get_se_golomb(&gb); 3216 if(j != i - COUNT/2){ 3217 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); 3218// return -1; 3219 } 3220 STOP_TIMER("get_se_golomb"); 3221 } 3222 3223#if 0 3224 printf("testing 4x4 (I)DCT\n"); 3225 3226 DCTELEM block[16]; 3227 uint8_t src[16], ref[16]; 3228 uint64_t error= 0, max_error=0; 3229 3230 for(i=0; i<COUNT; i++){ 3231 int j; 3232// printf("%d %d %d\n", r1, r2, (r2-r1)*16); 3233 for(j=0; j<16; j++){ 3234 ref[j]= random()%255; 3235 src[j]= random()%255; 3236 } 3237 3238 h264_diff_dct_c(block, src, ref, 4); 3239 3240 //normalize 3241 for(j=0; j<16; j++){ 3242// printf("%d ", block[j]); 3243 block[j]= block[j]*4; 3244 if(j&1) block[j]= (block[j]*4 + 2)/5; 3245 if(j&4) block[j]= (block[j]*4 + 2)/5; 3246 } 3247// printf("\n"); 3248 3249 h->h264dsp.h264_idct_add(ref, block, 4); 3250/* for(j=0; j<16; j++){ 3251 printf("%d ", ref[j]); 3252 } 3253 printf("\n");*/ 3254 3255 for(j=0; j<16; j++){ 3256 int diff= FFABS(src[j] - ref[j]); 3257 3258 error+= diff*diff; 3259 max_error= FFMAX(max_error, diff); 3260 } 3261 } 3262 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); 3263 printf("testing quantizer\n"); 3264 for(qp=0; qp<52; qp++){ 3265 for(i=0; i<16; i++) 3266 src1_block[i]= src2_block[i]= random()%255; 3267 3268 } 3269 printf("Testing NAL layer\n"); 3270 3271 uint8_t bitstream[COUNT]; 3272 uint8_t nal[COUNT*2]; 3273 H264Context h; 3274 memset(&h, 0, sizeof(H264Context)); 3275 3276 for(i=0; i<COUNT; i++){ 3277 int zeros= i; 3278 int nal_length; 3279 int consumed; 3280 int out_length; 3281 uint8_t *out; 3282 int j; 3283 3284 for(j=0; j<COUNT; j++){ 3285 bitstream[j]= (random() % 255) + 1; 3286 } 3287 3288 for(j=0; j<zeros; j++){ 3289 int pos= random() % COUNT; 3290 while(bitstream[pos] == 0){ 3291 pos++; 3292 pos %= COUNT; 3293 } 3294 bitstream[pos]=0; 3295 } 3296 3297 START_TIMER 3298 3299 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2); 3300 if(nal_length<0){ 3301 printf("encoding failed\n"); 3302 return -1; 3303 } 3304 3305 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length); 3306 3307 STOP_TIMER("NAL") 3308 3309 if(out_length != COUNT){ 3310 printf("incorrect length %d %d\n", out_length, COUNT); 3311 return -1; 3312 } 3313 3314 if(consumed != nal_length){ 3315 printf("incorrect consumed length %d %d\n", nal_length, consumed); 3316 return -1; 3317 } 3318 3319 if(memcmp(bitstream, out, COUNT)){ 3320 printf("mismatch\n"); 3321 return -1; 3322 } 3323 } 3324#endif 3325 3326 printf("Testing RBSP\n"); 3327 3328 3329 return 0; 3330} 3331#endif /* TEST */ 3332 3333 3334av_cold void ff_h264_free_context(H264Context *h) 3335{ 3336 int i; 3337 3338 free_tables(h); //FIXME cleanup init stuff perhaps 3339 3340 for(i = 0; i < MAX_SPS_COUNT; i++) 3341 av_freep(h->sps_buffers + i); 3342 3343 for(i = 0; i < MAX_PPS_COUNT; i++) 3344 av_freep(h->pps_buffers + i); 3345} 3346 3347av_cold int ff_h264_decode_end(AVCodecContext *avctx) 3348{ 3349 H264Context *h = avctx->priv_data; 3350 MpegEncContext *s = &h->s; 3351 3352 ff_h264_free_context(h); 3353 3354 MPV_common_end(s); 3355 3356// memset(h, 0, sizeof(H264Context)); 3357 3358 return 0; 3359} 3360 3361 3362AVCodec h264_decoder = { 3363 "h264", 3364 AVMEDIA_TYPE_VIDEO, 3365 CODEC_ID_H264, 3366 sizeof(H264Context), 3367 ff_h264_decode_init, 3368 NULL, 3369 ff_h264_decode_end, 3370 decode_frame, 3371 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY, 3372 .flush= flush_dpb, 3373 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), 3374 .pix_fmts= ff_hwaccel_pixfmt_list_420, 3375}; 3376 3377#if CONFIG_H264_VDPAU_DECODER 3378AVCodec h264_vdpau_decoder = { 3379 "h264_vdpau", 3380 AVMEDIA_TYPE_VIDEO, 3381 CODEC_ID_H264, 3382 sizeof(H264Context), 3383 ff_h264_decode_init, 3384 NULL, 3385 ff_h264_decode_end, 3386 decode_frame, 3387 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, 3388 .flush= flush_dpb, 3389 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), 3390 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE}, 3391}; 3392#endif 3393