1/** 2 * VP8 compatible video decoder 3 * 4 * Copyright (C) 2010 David Conrad 5 * Copyright (C) 2010 Ronald S. Bultje 6 * Copyright (C) 2010 Jason Garrett-Glaser 7 * 8 * This file is part of Libav. 9 * 10 * Libav is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * Libav is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with Libav; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25#include "libavutil/imgutils.h" 26#include "avcodec.h" 27#include "internal.h" 28#include "vp8.h" 29#include "vp8data.h" 30#include "rectangle.h" 31#include "thread.h" 32 33#if ARCH_ARM 34# include "arm/vp8.h" 35#endif 36 37static void free_buffers(VP8Context *s) 38{ 39 av_freep(&s->macroblocks_base); 40 av_freep(&s->filter_strength); 41 av_freep(&s->intra4x4_pred_mode_top); 42 av_freep(&s->top_nnz); 43 av_freep(&s->edge_emu_buffer); 44 av_freep(&s->top_border); 45 46 s->macroblocks = NULL; 47} 48 49static int vp8_alloc_frame(VP8Context *s, AVFrame *f) 50{ 51 int ret; 52 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0) 53 return ret; 54 if (s->num_maps_to_be_freed && !s->maps_are_invalid) { 55 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed]; 56 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) { 57 ff_thread_release_buffer(s->avctx, f); 58 return AVERROR(ENOMEM); 59 } 60 return 0; 61} 62 63static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free) 64{ 65 if (f->ref_index[0]) { 66 if (prefer_delayed_free) { 67 /* Upon a size change, we want to free the maps but other threads may still 68 * be using them, so queue them. Upon a seek, all threads are inactive so 69 * we want to cache one to prevent re-allocation in the next decoding 70 * iteration, but the rest we can free directly. */ 71 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps); 72 if (s->num_maps_to_be_freed < max_queued_maps) { 73 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0]; 74 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ { 75 av_free(f->ref_index[0]); 76 } /* else: MEMLEAK (should never happen, but better that than crash) */ 77 f->ref_index[0] = NULL; 78 } else /* vp8_decode_free() */ { 79 av_free(f->ref_index[0]); 80 } 81 } 82 ff_thread_release_buffer(s->avctx, f); 83} 84 85static void vp8_decode_flush_impl(AVCodecContext *avctx, 86 int prefer_delayed_free, int can_direct_free, int free_mem) 87{ 88 VP8Context *s = avctx->priv_data; 89 int i; 90 91 if (!avctx->internal->is_copy) { 92 for (i = 0; i < 5; i++) 93 if (s->frames[i].data[0]) 94 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free); 95 } 96 memset(s->framep, 0, sizeof(s->framep)); 97 98 if (free_mem) { 99 free_buffers(s); 100 s->maps_are_invalid = 1; 101 } 102} 103 104static void vp8_decode_flush(AVCodecContext *avctx) 105{ 106 vp8_decode_flush_impl(avctx, 1, 1, 0); 107} 108 109static int update_dimensions(VP8Context *s, int width, int height) 110{ 111 if (width != s->avctx->width || 112 height != s->avctx->height) { 113 if (av_image_check_size(width, height, 0, s->avctx)) 114 return AVERROR_INVALIDDATA; 115 116 vp8_decode_flush_impl(s->avctx, 1, 0, 1); 117 118 avcodec_set_dimensions(s->avctx, width, height); 119 } 120 121 s->mb_width = (s->avctx->coded_width +15) / 16; 122 s->mb_height = (s->avctx->coded_height+15) / 16; 123 124 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); 125 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength)); 126 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); 127 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); 128 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); 129 130 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || 131 !s->top_nnz || !s->top_border) 132 return AVERROR(ENOMEM); 133 134 s->macroblocks = s->macroblocks_base + 1; 135 136 return 0; 137} 138 139static void parse_segment_info(VP8Context *s) 140{ 141 VP56RangeCoder *c = &s->c; 142 int i; 143 144 s->segmentation.update_map = vp8_rac_get(c); 145 146 if (vp8_rac_get(c)) { // update segment feature data 147 s->segmentation.absolute_vals = vp8_rac_get(c); 148 149 for (i = 0; i < 4; i++) 150 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); 151 152 for (i = 0; i < 4; i++) 153 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); 154 } 155 if (s->segmentation.update_map) 156 for (i = 0; i < 3; i++) 157 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 158} 159 160static void update_lf_deltas(VP8Context *s) 161{ 162 VP56RangeCoder *c = &s->c; 163 int i; 164 165 for (i = 0; i < 4; i++) 166 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); 167 168 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) 169 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); 170} 171 172static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 173{ 174 const uint8_t *sizes = buf; 175 int i; 176 177 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); 178 179 buf += 3*(s->num_coeff_partitions-1); 180 buf_size -= 3*(s->num_coeff_partitions-1); 181 if (buf_size < 0) 182 return -1; 183 184 for (i = 0; i < s->num_coeff_partitions-1; i++) { 185 int size = AV_RL24(sizes + 3*i); 186 if (buf_size - size < 0) 187 return -1; 188 189 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); 190 buf += size; 191 buf_size -= size; 192 } 193 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); 194 195 return 0; 196} 197 198static void get_quants(VP8Context *s) 199{ 200 VP56RangeCoder *c = &s->c; 201 int i, base_qi; 202 203 int yac_qi = vp8_rac_get_uint(c, 7); 204 int ydc_delta = vp8_rac_get_sint(c, 4); 205 int y2dc_delta = vp8_rac_get_sint(c, 4); 206 int y2ac_delta = vp8_rac_get_sint(c, 4); 207 int uvdc_delta = vp8_rac_get_sint(c, 4); 208 int uvac_delta = vp8_rac_get_sint(c, 4); 209 210 for (i = 0; i < 4; i++) { 211 if (s->segmentation.enabled) { 212 base_qi = s->segmentation.base_quant[i]; 213 if (!s->segmentation.absolute_vals) 214 base_qi += yac_qi; 215 } else 216 base_qi = yac_qi; 217 218 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)]; 219 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)]; 220 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)]; 221 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100; 222 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; 223 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; 224 225 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); 226 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); 227 } 228} 229 230/** 231 * Determine which buffers golden and altref should be updated with after this frame. 232 * The spec isn't clear here, so I'm going by my understanding of what libvpx does 233 * 234 * Intra frames update all 3 references 235 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set 236 * If the update (golden|altref) flag is set, it's updated with the current frame 237 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. 238 * If the flag is not set, the number read means: 239 * 0: no update 240 * 1: VP56_FRAME_PREVIOUS 241 * 2: update golden with altref, or update altref with golden 242 */ 243static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 244{ 245 VP56RangeCoder *c = &s->c; 246 247 if (update) 248 return VP56_FRAME_CURRENT; 249 250 switch (vp8_rac_get_uint(c, 2)) { 251 case 1: 252 return VP56_FRAME_PREVIOUS; 253 case 2: 254 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; 255 } 256 return VP56_FRAME_NONE; 257} 258 259static void update_refs(VP8Context *s) 260{ 261 VP56RangeCoder *c = &s->c; 262 263 int update_golden = vp8_rac_get(c); 264 int update_altref = vp8_rac_get(c); 265 266 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 267 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 268} 269 270static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 271{ 272 VP56RangeCoder *c = &s->c; 273 int header_size, hscale, vscale, i, j, k, l, m, ret; 274 int width = s->avctx->width; 275 int height = s->avctx->height; 276 277 s->keyframe = !(buf[0] & 1); 278 s->profile = (buf[0]>>1) & 7; 279 s->invisible = !(buf[0] & 0x10); 280 header_size = AV_RL24(buf) >> 5; 281 buf += 3; 282 buf_size -= 3; 283 284 if (s->profile > 3) 285 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); 286 287 if (!s->profile) 288 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); 289 else // profile 1-3 use bilinear, 4+ aren't defined so whatever 290 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); 291 292 if (header_size > buf_size - 7*s->keyframe) { 293 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); 294 return AVERROR_INVALIDDATA; 295 } 296 297 if (s->keyframe) { 298 if (AV_RL24(buf) != 0x2a019d) { 299 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); 300 return AVERROR_INVALIDDATA; 301 } 302 width = AV_RL16(buf+3) & 0x3fff; 303 height = AV_RL16(buf+5) & 0x3fff; 304 hscale = buf[4] >> 6; 305 vscale = buf[6] >> 6; 306 buf += 7; 307 buf_size -= 7; 308 309 if (hscale || vscale) 310 av_log_missing_feature(s->avctx, "Upscaling", 1); 311 312 s->update_golden = s->update_altref = VP56_FRAME_CURRENT; 313 for (i = 0; i < 4; i++) 314 for (j = 0; j < 16; j++) 315 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], 316 sizeof(s->prob->token[i][j])); 317 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); 318 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); 319 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); 320 memset(&s->segmentation, 0, sizeof(s->segmentation)); 321 memset(&s->lf_delta, 0, sizeof(s->lf_delta)); 322 } 323 324 if (!s->macroblocks_base || /* first frame */ 325 width != s->avctx->width || height != s->avctx->height) { 326 if ((ret = update_dimensions(s, width, height)) < 0) 327 return ret; 328 } 329 330 ff_vp56_init_range_decoder(c, buf, header_size); 331 buf += header_size; 332 buf_size -= header_size; 333 334 if (s->keyframe) { 335 if (vp8_rac_get(c)) 336 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); 337 vp8_rac_get(c); // whether we can skip clamping in dsp functions 338 } 339 340 if ((s->segmentation.enabled = vp8_rac_get(c))) 341 parse_segment_info(s); 342 else 343 s->segmentation.update_map = 0; // FIXME: move this to some init function? 344 345 s->filter.simple = vp8_rac_get(c); 346 s->filter.level = vp8_rac_get_uint(c, 6); 347 s->filter.sharpness = vp8_rac_get_uint(c, 3); 348 349 if ((s->lf_delta.enabled = vp8_rac_get(c))) 350 if (vp8_rac_get(c)) 351 update_lf_deltas(s); 352 353 if (setup_partitions(s, buf, buf_size)) { 354 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); 355 return AVERROR_INVALIDDATA; 356 } 357 358 get_quants(s); 359 360 if (!s->keyframe) { 361 update_refs(s); 362 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 363 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); 364 } 365 366 // if we aren't saving this frame's probabilities for future frames, 367 // make a copy of the current probabilities 368 if (!(s->update_probabilities = vp8_rac_get(c))) 369 s->prob[1] = s->prob[0]; 370 371 s->update_last = s->keyframe || vp8_rac_get(c); 372 373 for (i = 0; i < 4; i++) 374 for (j = 0; j < 8; j++) 375 for (k = 0; k < 3; k++) 376 for (l = 0; l < NUM_DCT_TOKENS-1; l++) 377 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { 378 int prob = vp8_rac_get_uint(c, 8); 379 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) 380 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; 381 } 382 383 if ((s->mbskip_enabled = vp8_rac_get(c))) 384 s->prob->mbskip = vp8_rac_get_uint(c, 8); 385 386 if (!s->keyframe) { 387 s->prob->intra = vp8_rac_get_uint(c, 8); 388 s->prob->last = vp8_rac_get_uint(c, 8); 389 s->prob->golden = vp8_rac_get_uint(c, 8); 390 391 if (vp8_rac_get(c)) 392 for (i = 0; i < 4; i++) 393 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); 394 if (vp8_rac_get(c)) 395 for (i = 0; i < 3; i++) 396 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); 397 398 // 17.2 MV probability update 399 for (i = 0; i < 2; i++) 400 for (j = 0; j < 19; j++) 401 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j])) 402 s->prob->mvc[i][j] = vp8_rac_get_nn(c); 403 } 404 405 return 0; 406} 407 408static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) 409{ 410 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x); 411 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y); 412} 413 414/** 415 * Motion vector coding, 17.1. 416 */ 417static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) 418{ 419 int bit, x = 0; 420 421 if (vp56_rac_get_prob_branchy(c, p[0])) { 422 int i; 423 424 for (i = 0; i < 3; i++) 425 x += vp56_rac_get_prob(c, p[9 + i]) << i; 426 for (i = 9; i > 3; i--) 427 x += vp56_rac_get_prob(c, p[9 + i]) << i; 428 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) 429 x += 8; 430 } else { 431 // small_mvtree 432 const uint8_t *ps = p+2; 433 bit = vp56_rac_get_prob(c, *ps); 434 ps += 1 + 3*bit; 435 x += 4*bit; 436 bit = vp56_rac_get_prob(c, *ps); 437 ps += 1 + bit; 438 x += 2*bit; 439 x += vp56_rac_get_prob(c, *ps); 440 } 441 442 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; 443} 444 445static av_always_inline 446const uint8_t *get_submv_prob(uint32_t left, uint32_t top) 447{ 448 if (left == top) 449 return vp8_submv_prob[4-!!left]; 450 if (!top) 451 return vp8_submv_prob[2]; 452 return vp8_submv_prob[1-!!left]; 453} 454 455/** 456 * Split motion vector prediction, 16.4. 457 * @returns the number of motion vectors parsed (2, 4 or 16) 458 */ 459static av_always_inline 460int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) 461{ 462 int part_idx; 463 int n, num; 464 VP8Macroblock *top_mb = &mb[2]; 465 VP8Macroblock *left_mb = &mb[-1]; 466 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], 467 *mbsplits_top = vp8_mbsplits[top_mb->partitioning], 468 *mbsplits_cur, *firstidx; 469 VP56mv *top_mv = top_mb->bmv; 470 VP56mv *left_mv = left_mb->bmv; 471 VP56mv *cur_mv = mb->bmv; 472 473 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { 474 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { 475 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); 476 } else { 477 part_idx = VP8_SPLITMVMODE_8x8; 478 } 479 } else { 480 part_idx = VP8_SPLITMVMODE_4x4; 481 } 482 483 num = vp8_mbsplit_count[part_idx]; 484 mbsplits_cur = vp8_mbsplits[part_idx], 485 firstidx = vp8_mbfirstidx[part_idx]; 486 mb->partitioning = part_idx; 487 488 for (n = 0; n < num; n++) { 489 int k = firstidx[n]; 490 uint32_t left, above; 491 const uint8_t *submv_prob; 492 493 if (!(k & 3)) 494 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); 495 else 496 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); 497 if (k <= 3) 498 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); 499 else 500 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]); 501 502 submv_prob = get_submv_prob(left, above); 503 504 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { 505 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { 506 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { 507 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); 508 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); 509 } else { 510 AV_ZERO32(&mb->bmv[n]); 511 } 512 } else { 513 AV_WN32A(&mb->bmv[n], above); 514 } 515 } else { 516 AV_WN32A(&mb->bmv[n], left); 517 } 518 } 519 520 return num; 521} 522 523static av_always_inline 524void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y) 525{ 526 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 527 mb - 1 /* left */, 528 mb + 1 /* top-left */ }; 529 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; 530 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; 531 int idx = CNT_ZERO; 532 int cur_sign_bias = s->sign_bias[mb->ref_frame]; 533 int8_t *sign_bias = s->sign_bias; 534 VP56mv near_mv[4]; 535 uint8_t cnt[4] = { 0 }; 536 VP56RangeCoder *c = &s->c; 537 538 AV_ZERO32(&near_mv[0]); 539 AV_ZERO32(&near_mv[1]); 540 AV_ZERO32(&near_mv[2]); 541 542 /* Process MB on top, left and top-left */ 543 #define MV_EDGE_CHECK(n)\ 544 {\ 545 VP8Macroblock *edge = mb_edge[n];\ 546 int edge_ref = edge->ref_frame;\ 547 if (edge_ref != VP56_FRAME_CURRENT) {\ 548 uint32_t mv = AV_RN32A(&edge->mv);\ 549 if (mv) {\ 550 if (cur_sign_bias != sign_bias[edge_ref]) {\ 551 /* SWAR negate of the values in mv. */\ 552 mv = ~mv;\ 553 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ 554 }\ 555 if (!n || mv != AV_RN32A(&near_mv[idx]))\ 556 AV_WN32A(&near_mv[++idx], mv);\ 557 cnt[idx] += 1 + (n != 2);\ 558 } else\ 559 cnt[CNT_ZERO] += 1 + (n != 2);\ 560 }\ 561 } 562 563 MV_EDGE_CHECK(0) 564 MV_EDGE_CHECK(1) 565 MV_EDGE_CHECK(2) 566 567 mb->partitioning = VP8_SPLITMVMODE_NONE; 568 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) { 569 mb->mode = VP8_MVMODE_MV; 570 571 /* If we have three distinct MVs, merge first and last if they're the same */ 572 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) 573 cnt[CNT_NEAREST] += 1; 574 575 /* Swap near and nearest if necessary */ 576 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { 577 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 578 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 579 } 580 581 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { 582 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { 583 584 /* Choose the best mv out of 0,0 and the nearest mv */ 585 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); 586 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + 587 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + 588 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); 589 590 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { 591 mb->mode = VP8_MVMODE_SPLIT; 592 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; 593 } else { 594 mb->mv.y += read_mv_component(c, s->prob->mvc[0]); 595 mb->mv.x += read_mv_component(c, s->prob->mvc[1]); 596 mb->bmv[0] = mb->mv; 597 } 598 } else { 599 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]); 600 mb->bmv[0] = mb->mv; 601 } 602 } else { 603 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]); 604 mb->bmv[0] = mb->mv; 605 } 606 } else { 607 mb->mode = VP8_MVMODE_ZERO; 608 AV_ZERO32(&mb->mv); 609 mb->bmv[0] = mb->mv; 610 } 611} 612 613static av_always_inline 614void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, 615 int mb_x, int keyframe) 616{ 617 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; 618 if (keyframe) { 619 int x, y; 620 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x; 621 uint8_t* const left = s->intra4x4_pred_mode_left; 622 for (y = 0; y < 4; y++) { 623 for (x = 0; x < 4; x++) { 624 const uint8_t *ctx; 625 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 626 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 627 left[y] = top[x] = *intra4x4; 628 intra4x4++; 629 } 630 } 631 } else { 632 int i; 633 for (i = 0; i < 16; i++) 634 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 635 } 636} 637 638static av_always_inline 639void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref) 640{ 641 VP56RangeCoder *c = &s->c; 642 643 if (s->segmentation.update_map) 644 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); 645 else 646 *segment = ref ? *ref : *segment; 647 s->segment = *segment; 648 649 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; 650 651 if (s->keyframe) { 652 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 653 654 if (mb->mode == MODE_I4x4) { 655 decode_intra4x4_modes(s, c, mb_x, 1); 656 } else { 657 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; 658 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); 659 AV_WN32A(s->intra4x4_pred_mode_left, modes); 660 } 661 662 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 663 mb->ref_frame = VP56_FRAME_CURRENT; 664 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { 665 // inter MB, 16.2 666 if (vp56_rac_get_prob_branchy(c, s->prob->last)) 667 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? 668 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; 669 else 670 mb->ref_frame = VP56_FRAME_PREVIOUS; 671 s->ref_count[mb->ref_frame-1]++; 672 673 // motion vectors, 16.3 674 decode_mvs(s, mb, mb_x, mb_y); 675 } else { 676 // intra MB, 16.1 677 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); 678 679 if (mb->mode == MODE_I4x4) 680 decode_intra4x4_modes(s, c, mb_x, 0); 681 682 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); 683 mb->ref_frame = VP56_FRAME_CURRENT; 684 mb->partitioning = VP8_SPLITMVMODE_NONE; 685 AV_ZERO32(&mb->bmv[0]); 686 } 687} 688 689#ifndef decode_block_coeffs_internal 690/** 691 * @param c arithmetic bitstream reader context 692 * @param block destination for block coefficients 693 * @param probs probabilities to use when reading trees from the bitstream 694 * @param i initial coeff index, 0 unless a separate DC block is coded 695 * @param qmul array holding the dc/ac dequant factor at position 0/1 696 * @return 0 if no coeffs were decoded 697 * otherwise, the index of the last coeff decoded plus one 698 */ 699static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16], 700 uint8_t probs[16][3][NUM_DCT_TOKENS-1], 701 int i, uint8_t *token_prob, int16_t qmul[2]) 702{ 703 goto skip_eob; 704 do { 705 int coeff; 706 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 707 return i; 708 709skip_eob: 710 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 711 if (++i == 16) 712 return i; // invalid input; blocks should end with EOB 713 token_prob = probs[i][0]; 714 goto skip_eob; 715 } 716 717 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 718 coeff = 1; 719 token_prob = probs[i+1][1]; 720 } else { 721 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 722 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]); 723 if (coeff) 724 coeff += vp56_rac_get_prob(c, token_prob[5]); 725 coeff += 2; 726 } else { 727 // DCT_CAT* 728 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { 729 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 730 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); 731 } else { // DCT_CAT2 732 coeff = 7; 733 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; 734 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); 735 } 736 } else { // DCT_CAT3 and up 737 int a = vp56_rac_get_prob(c, token_prob[8]); 738 int b = vp56_rac_get_prob(c, token_prob[9+a]); 739 int cat = (a<<1) + b; 740 coeff = 3 + (8<<cat); 741 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]); 742 } 743 } 744 token_prob = probs[i+1][2]; 745 } 746 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; 747 } while (++i < 16); 748 749 return i; 750} 751#endif 752 753/** 754 * @param c arithmetic bitstream reader context 755 * @param block destination for block coefficients 756 * @param probs probabilities to use when reading trees from the bitstream 757 * @param i initial coeff index, 0 unless a separate DC block is coded 758 * @param zero_nhood the initial prediction context for number of surrounding 759 * all-zero blocks (only left/top, so 0-2) 760 * @param qmul array holding the dc/ac dequant factor at position 0/1 761 * @return 0 if no coeffs were decoded 762 * otherwise, the index of the last coeff decoded plus one 763 */ 764static av_always_inline 765int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 766 uint8_t probs[16][3][NUM_DCT_TOKENS-1], 767 int i, int zero_nhood, int16_t qmul[2]) 768{ 769 uint8_t *token_prob = probs[i][zero_nhood]; 770 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 771 return 0; 772 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul); 773} 774 775static av_always_inline 776void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 777 uint8_t t_nnz[9], uint8_t l_nnz[9]) 778{ 779 int i, x, y, luma_start = 0, luma_ctx = 3; 780 int nnz_pred, nnz, nnz_total = 0; 781 int segment = s->segment; 782 int block_dc = 0; 783 784 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { 785 nnz_pred = t_nnz[8] + l_nnz[8]; 786 787 // decode DC values and do hadamard 788 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, 789 s->qmat[segment].luma_dc_qmul); 790 l_nnz[8] = t_nnz[8] = !!nnz; 791 if (nnz) { 792 nnz_total += nnz; 793 block_dc = 1; 794 if (nnz == 1) 795 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc); 796 else 797 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); 798 } 799 luma_start = 1; 800 luma_ctx = 0; 801 } 802 803 // luma blocks 804 for (y = 0; y < 4; y++) 805 for (x = 0; x < 4; x++) { 806 nnz_pred = l_nnz[y] + t_nnz[x]; 807 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, 808 nnz_pred, s->qmat[segment].luma_qmul); 809 // nnz+block_dc may be one more than the actual last index, but we don't care 810 s->non_zero_count_cache[y][x] = nnz + block_dc; 811 t_nnz[x] = l_nnz[y] = !!nnz; 812 nnz_total += nnz; 813 } 814 815 // chroma blocks 816 // TODO: what to do about dimensions? 2nd dim for luma is x, 817 // but for chroma it's (y<<1)|x 818 for (i = 4; i < 6; i++) 819 for (y = 0; y < 2; y++) 820 for (x = 0; x < 2; x++) { 821 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; 822 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, 823 nnz_pred, s->qmat[segment].chroma_qmul); 824 s->non_zero_count_cache[i][(y<<1)+x] = nnz; 825 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; 826 nnz_total += nnz; 827 } 828 829 // if there were no coded coeffs despite the macroblock not being marked skip, 830 // we MUST not do the inner loop filter and should not do IDCT 831 // Since skip isn't used for bitstream prediction, just manually set it. 832 if (!nnz_total) 833 mb->skip = 1; 834} 835 836static av_always_inline 837void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, 838 int linesize, int uvlinesize, int simple) 839{ 840 AV_COPY128(top_border, src_y + 15*linesize); 841 if (!simple) { 842 AV_COPY64(top_border+16, src_cb + 7*uvlinesize); 843 AV_COPY64(top_border+24, src_cr + 7*uvlinesize); 844 } 845} 846 847static av_always_inline 848void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, 849 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, 850 int simple, int xchg) 851{ 852 uint8_t *top_border_m1 = top_border-32; // for TL prediction 853 src_y -= linesize; 854 src_cb -= uvlinesize; 855 src_cr -= uvlinesize; 856 857#define XCHG(a,b,xchg) do { \ 858 if (xchg) AV_SWAP64(b,a); \ 859 else AV_COPY64(b,a); \ 860 } while (0) 861 862 XCHG(top_border_m1+8, src_y-8, xchg); 863 XCHG(top_border, src_y, xchg); 864 XCHG(top_border+8, src_y+8, 1); 865 if (mb_x < mb_width-1) 866 XCHG(top_border+32, src_y+16, 1); 867 868 // only copy chroma for normal loop filter 869 // or to initialize the top row to 127 870 if (!simple || !mb_y) { 871 XCHG(top_border_m1+16, src_cb-8, xchg); 872 XCHG(top_border_m1+24, src_cr-8, xchg); 873 XCHG(top_border+16, src_cb, 1); 874 XCHG(top_border+24, src_cr, 1); 875 } 876} 877 878static av_always_inline 879int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) 880{ 881 if (!mb_x) { 882 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 883 } else { 884 return mb_y ? mode : LEFT_DC_PRED8x8; 885 } 886} 887 888static av_always_inline 889int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y) 890{ 891 if (!mb_x) { 892 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8; 893 } else { 894 return mb_y ? mode : HOR_PRED8x8; 895 } 896} 897 898static av_always_inline 899int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y) 900{ 901 if (mode == DC_PRED8x8) { 902 return check_dc_pred8x8_mode(mode, mb_x, mb_y); 903 } else { 904 return mode; 905 } 906} 907 908static av_always_inline 909int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y) 910{ 911 switch (mode) { 912 case DC_PRED8x8: 913 return check_dc_pred8x8_mode(mode, mb_x, mb_y); 914 case VERT_PRED8x8: 915 return !mb_y ? DC_127_PRED8x8 : mode; 916 case HOR_PRED8x8: 917 return !mb_x ? DC_129_PRED8x8 : mode; 918 case PLANE_PRED8x8 /*TM*/: 919 return check_tm_pred8x8_mode(mode, mb_x, mb_y); 920 } 921 return mode; 922} 923 924static av_always_inline 925int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y) 926{ 927 if (!mb_x) { 928 return mb_y ? VERT_VP8_PRED : DC_129_PRED; 929 } else { 930 return mb_y ? mode : HOR_VP8_PRED; 931 } 932} 933 934static av_always_inline 935int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf) 936{ 937 switch (mode) { 938 case VERT_PRED: 939 if (!mb_x && mb_y) { 940 *copy_buf = 1; 941 return mode; 942 } 943 /* fall-through */ 944 case DIAG_DOWN_LEFT_PRED: 945 case VERT_LEFT_PRED: 946 return !mb_y ? DC_127_PRED : mode; 947 case HOR_PRED: 948 if (!mb_y) { 949 *copy_buf = 1; 950 return mode; 951 } 952 /* fall-through */ 953 case HOR_UP_PRED: 954 return !mb_x ? DC_129_PRED : mode; 955 case TM_VP8_PRED: 956 return check_tm_pred4x4_mode(mode, mb_x, mb_y); 957 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC 958 case DIAG_DOWN_RIGHT_PRED: 959 case VERT_RIGHT_PRED: 960 case HOR_DOWN_PRED: 961 if (!mb_y || !mb_x) 962 *copy_buf = 1; 963 return mode; 964 } 965 return mode; 966} 967 968static av_always_inline 969void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 970 int mb_x, int mb_y) 971{ 972 AVCodecContext *avctx = s->avctx; 973 int x, y, mode, nnz; 974 uint32_t tr; 975 976 // for the first row, we need to run xchg_mb_border to init the top edge to 127 977 // otherwise, skip it if we aren't going to deblock 978 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) 979 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], 980 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 981 s->filter.simple, 1); 982 983 if (mb->mode < MODE_I4x4) { 984 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested 985 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y); 986 } else { 987 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y); 988 } 989 s->hpc.pred16x16[mode](dst[0], s->linesize); 990 } else { 991 uint8_t *ptr = dst[0]; 992 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; 993 uint8_t tr_top[4] = { 127, 127, 127, 127 }; 994 995 // all blocks on the right edge of the macroblock use bottom edge 996 // the top macroblock for their topright edge 997 uint8_t *tr_right = ptr - s->linesize + 16; 998 999 // if we're on the right edge of the frame, said edge is extended 1000 // from the top macroblock 1001 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) && 1002 mb_x == s->mb_width-1) { 1003 tr = tr_right[-1]*0x01010101u; 1004 tr_right = (uint8_t *)&tr; 1005 } 1006 1007 if (mb->skip) 1008 AV_ZERO128(s->non_zero_count_cache); 1009 1010 for (y = 0; y < 4; y++) { 1011 uint8_t *topright = ptr + 4 - s->linesize; 1012 for (x = 0; x < 4; x++) { 1013 int copy = 0, linesize = s->linesize; 1014 uint8_t *dst = ptr+4*x; 1015 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8]; 1016 1017 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) { 1018 topright = tr_top; 1019 } else if (x == 3) 1020 topright = tr_right; 1021 1022 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works 1023 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©); 1024 if (copy) { 1025 dst = copy_dst + 12; 1026 linesize = 8; 1027 if (!(mb_y + y)) { 1028 copy_dst[3] = 127U; 1029 AV_WN32A(copy_dst+4, 127U * 0x01010101U); 1030 } else { 1031 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize); 1032 if (!(mb_x + x)) { 1033 copy_dst[3] = 129U; 1034 } else { 1035 copy_dst[3] = ptr[4*x-s->linesize-1]; 1036 } 1037 } 1038 if (!(mb_x + x)) { 1039 copy_dst[11] = 1040 copy_dst[19] = 1041 copy_dst[27] = 1042 copy_dst[35] = 129U; 1043 } else { 1044 copy_dst[11] = ptr[4*x -1]; 1045 copy_dst[19] = ptr[4*x+s->linesize -1]; 1046 copy_dst[27] = ptr[4*x+s->linesize*2-1]; 1047 copy_dst[35] = ptr[4*x+s->linesize*3-1]; 1048 } 1049 } 1050 } else { 1051 mode = intra4x4[x]; 1052 } 1053 s->hpc.pred4x4[mode](dst, topright, linesize); 1054 if (copy) { 1055 AV_COPY32(ptr+4*x , copy_dst+12); 1056 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20); 1057 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28); 1058 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36); 1059 } 1060 1061 nnz = s->non_zero_count_cache[y][x]; 1062 if (nnz) { 1063 if (nnz == 1) 1064 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); 1065 else 1066 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); 1067 } 1068 topright += 4; 1069 } 1070 1071 ptr += 4*s->linesize; 1072 intra4x4 += 4; 1073 } 1074 } 1075 1076 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { 1077 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y); 1078 } else { 1079 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y); 1080 } 1081 s->hpc.pred8x8[mode](dst[1], s->uvlinesize); 1082 s->hpc.pred8x8[mode](dst[2], s->uvlinesize); 1083 1084 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) 1085 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], 1086 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, 1087 s->filter.simple, 0); 1088} 1089 1090static const uint8_t subpel_idx[3][8] = { 1091 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, 1092 // also function pointer index 1093 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required 1094 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels 1095}; 1096 1097/** 1098 * luma MC function 1099 * 1100 * @param s VP8 decoding context 1101 * @param dst target buffer for block data at block position 1102 * @param ref reference picture buffer at origin (0, 0) 1103 * @param mv motion vector (relative to block position) to get pixel data from 1104 * @param x_off horizontal position of block from origin (0, 0) 1105 * @param y_off vertical position of block from origin (0, 0) 1106 * @param block_w width of block (16, 8 or 4) 1107 * @param block_h height of block (always same as block_w) 1108 * @param width width of src/dst plane data 1109 * @param height height of src/dst plane data 1110 * @param linesize size of a single line of plane data, including padding 1111 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1112 */ 1113static av_always_inline 1114void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv, 1115 int x_off, int y_off, int block_w, int block_h, 1116 int width, int height, int linesize, 1117 vp8_mc_func mc_func[3][3]) 1118{ 1119 uint8_t *src = ref->data[0]; 1120 1121 if (AV_RN32A(mv)) { 1122 1123 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx]; 1124 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my]; 1125 1126 x_off += mv->x >> 2; 1127 y_off += mv->y >> 2; 1128 1129 // edge emulation 1130 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0); 1131 src += y_off * linesize + x_off; 1132 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1133 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1134 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, 1135 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], 1136 x_off - mx_idx, y_off - my_idx, width, height); 1137 src = s->edge_emu_buffer + mx_idx + linesize * my_idx; 1138 } 1139 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 1140 } else { 1141 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); 1142 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 1143 } 1144} 1145 1146/** 1147 * chroma MC function 1148 * 1149 * @param s VP8 decoding context 1150 * @param dst1 target buffer for block data at block position (U plane) 1151 * @param dst2 target buffer for block data at block position (V plane) 1152 * @param ref reference picture buffer at origin (0, 0) 1153 * @param mv motion vector (relative to block position) to get pixel data from 1154 * @param x_off horizontal position of block from origin (0, 0) 1155 * @param y_off vertical position of block from origin (0, 0) 1156 * @param block_w width of block (16, 8 or 4) 1157 * @param block_h height of block (always same as block_w) 1158 * @param width width of src/dst plane data 1159 * @param height height of src/dst plane data 1160 * @param linesize size of a single line of plane data, including padding 1161 * @param mc_func motion compensation function pointers (bilinear or sixtap MC) 1162 */ 1163static av_always_inline 1164void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref, 1165 const VP56mv *mv, int x_off, int y_off, 1166 int block_w, int block_h, int width, int height, int linesize, 1167 vp8_mc_func mc_func[3][3]) 1168{ 1169 uint8_t *src1 = ref->data[1], *src2 = ref->data[2]; 1170 1171 if (AV_RN32A(mv)) { 1172 int mx = mv->x&7, mx_idx = subpel_idx[0][mx]; 1173 int my = mv->y&7, my_idx = subpel_idx[0][my]; 1174 1175 x_off += mv->x >> 3; 1176 y_off += mv->y >> 3; 1177 1178 // edge emulation 1179 src1 += y_off * linesize + x_off; 1180 src2 += y_off * linesize + x_off; 1181 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0); 1182 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || 1183 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { 1184 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, 1185 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], 1186 x_off - mx_idx, y_off - my_idx, width, height); 1187 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx; 1188 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); 1189 1190 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, 1191 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], 1192 x_off - mx_idx, y_off - my_idx, width, height); 1193 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx; 1194 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); 1195 } else { 1196 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); 1197 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); 1198 } 1199 } else { 1200 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0); 1201 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1202 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); 1203 } 1204} 1205 1206static av_always_inline 1207void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 1208 AVFrame *ref_frame, int x_off, int y_off, 1209 int bx_off, int by_off, 1210 int block_w, int block_h, 1211 int width, int height, VP56mv *mv) 1212{ 1213 VP56mv uvmv = *mv; 1214 1215 /* Y */ 1216 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off, 1217 ref_frame, mv, x_off + bx_off, y_off + by_off, 1218 block_w, block_h, width, height, s->linesize, 1219 s->put_pixels_tab[block_w == 8]); 1220 1221 /* U/V */ 1222 if (s->profile == 3) { 1223 uvmv.x &= ~7; 1224 uvmv.y &= ~7; 1225 } 1226 x_off >>= 1; y_off >>= 1; 1227 bx_off >>= 1; by_off >>= 1; 1228 width >>= 1; height >>= 1; 1229 block_w >>= 1; block_h >>= 1; 1230 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off, 1231 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, 1232 &uvmv, x_off + bx_off, y_off + by_off, 1233 block_w, block_h, width, height, s->uvlinesize, 1234 s->put_pixels_tab[1 + (block_w == 4)]); 1235} 1236 1237/* Fetch pixels for estimated mv 4 macroblocks ahead. 1238 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ 1239static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 1240{ 1241 /* Don't prefetch refs that haven't been used very often this frame. */ 1242 if (s->ref_count[ref-1] > (mb_xy >> 5)) { 1243 int x_off = mb_x << 4, y_off = mb_y << 4; 1244 int mx = (mb->mv.x>>2) + x_off + 8; 1245 int my = (mb->mv.y>>2) + y_off; 1246 uint8_t **src= s->framep[ref]->data; 1247 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; 1248 /* For threading, a ff_thread_await_progress here might be useful, but 1249 * it actually slows down the decoder. Since a bad prefetch doesn't 1250 * generate bad decoder output, we don't run it here. */ 1251 s->dsp.prefetch(src[0]+off, s->linesize, 4); 1252 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; 1253 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); 1254 } 1255} 1256 1257/** 1258 * Apply motion vectors to prediction buffer, chapter 18. 1259 */ 1260static av_always_inline 1261void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 1262 int mb_x, int mb_y) 1263{ 1264 int x_off = mb_x << 4, y_off = mb_y << 4; 1265 int width = 16*s->mb_width, height = 16*s->mb_height; 1266 AVFrame *ref = s->framep[mb->ref_frame]; 1267 VP56mv *bmv = mb->bmv; 1268 1269 switch (mb->partitioning) { 1270 case VP8_SPLITMVMODE_NONE: 1271 vp8_mc_part(s, dst, ref, x_off, y_off, 1272 0, 0, 16, 16, width, height, &mb->mv); 1273 break; 1274 case VP8_SPLITMVMODE_4x4: { 1275 int x, y; 1276 VP56mv uvmv; 1277 1278 /* Y */ 1279 for (y = 0; y < 4; y++) { 1280 for (x = 0; x < 4; x++) { 1281 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4, 1282 ref, &bmv[4*y + x], 1283 4*x + x_off, 4*y + y_off, 4, 4, 1284 width, height, s->linesize, 1285 s->put_pixels_tab[2]); 1286 } 1287 } 1288 1289 /* U/V */ 1290 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; 1291 for (y = 0; y < 2; y++) { 1292 for (x = 0; x < 2; x++) { 1293 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + 1294 mb->bmv[ 2*y * 4 + 2*x+1].x + 1295 mb->bmv[(2*y+1) * 4 + 2*x ].x + 1296 mb->bmv[(2*y+1) * 4 + 2*x+1].x; 1297 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + 1298 mb->bmv[ 2*y * 4 + 2*x+1].y + 1299 mb->bmv[(2*y+1) * 4 + 2*x ].y + 1300 mb->bmv[(2*y+1) * 4 + 2*x+1].y; 1301 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; 1302 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; 1303 if (s->profile == 3) { 1304 uvmv.x &= ~7; 1305 uvmv.y &= ~7; 1306 } 1307 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4, 1308 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv, 1309 4*x + x_off, 4*y + y_off, 4, 4, 1310 width, height, s->uvlinesize, 1311 s->put_pixels_tab[2]); 1312 } 1313 } 1314 break; 1315 } 1316 case VP8_SPLITMVMODE_16x8: 1317 vp8_mc_part(s, dst, ref, x_off, y_off, 1318 0, 0, 16, 8, width, height, &bmv[0]); 1319 vp8_mc_part(s, dst, ref, x_off, y_off, 1320 0, 8, 16, 8, width, height, &bmv[1]); 1321 break; 1322 case VP8_SPLITMVMODE_8x16: 1323 vp8_mc_part(s, dst, ref, x_off, y_off, 1324 0, 0, 8, 16, width, height, &bmv[0]); 1325 vp8_mc_part(s, dst, ref, x_off, y_off, 1326 8, 0, 8, 16, width, height, &bmv[1]); 1327 break; 1328 case VP8_SPLITMVMODE_8x8: 1329 vp8_mc_part(s, dst, ref, x_off, y_off, 1330 0, 0, 8, 8, width, height, &bmv[0]); 1331 vp8_mc_part(s, dst, ref, x_off, y_off, 1332 8, 0, 8, 8, width, height, &bmv[1]); 1333 vp8_mc_part(s, dst, ref, x_off, y_off, 1334 0, 8, 8, 8, width, height, &bmv[2]); 1335 vp8_mc_part(s, dst, ref, x_off, y_off, 1336 8, 8, 8, 8, width, height, &bmv[3]); 1337 break; 1338 } 1339} 1340 1341static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 1342{ 1343 int x, y, ch; 1344 1345 if (mb->mode != MODE_I4x4) { 1346 uint8_t *y_dst = dst[0]; 1347 for (y = 0; y < 4; y++) { 1348 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]); 1349 if (nnz4) { 1350 if (nnz4&~0x01010101) { 1351 for (x = 0; x < 4; x++) { 1352 if ((uint8_t)nnz4 == 1) 1353 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); 1354 else if((uint8_t)nnz4 > 1) 1355 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); 1356 nnz4 >>= 8; 1357 if (!nnz4) 1358 break; 1359 } 1360 } else { 1361 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); 1362 } 1363 } 1364 y_dst += 4*s->linesize; 1365 } 1366 } 1367 1368 for (ch = 0; ch < 2; ch++) { 1369 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]); 1370 if (nnz4) { 1371 uint8_t *ch_dst = dst[1+ch]; 1372 if (nnz4&~0x01010101) { 1373 for (y = 0; y < 2; y++) { 1374 for (x = 0; x < 2; x++) { 1375 if ((uint8_t)nnz4 == 1) 1376 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); 1377 else if((uint8_t)nnz4 > 1) 1378 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); 1379 nnz4 >>= 8; 1380 if (!nnz4) 1381 goto chroma_idct_end; 1382 } 1383 ch_dst += 4*s->uvlinesize; 1384 } 1385 } else { 1386 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); 1387 } 1388 } 1389chroma_idct_end: ; 1390 } 1391} 1392 1393static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 1394{ 1395 int interior_limit, filter_level; 1396 1397 if (s->segmentation.enabled) { 1398 filter_level = s->segmentation.filter_level[s->segment]; 1399 if (!s->segmentation.absolute_vals) 1400 filter_level += s->filter.level; 1401 } else 1402 filter_level = s->filter.level; 1403 1404 if (s->lf_delta.enabled) { 1405 filter_level += s->lf_delta.ref[mb->ref_frame]; 1406 filter_level += s->lf_delta.mode[mb->mode]; 1407 } 1408 1409 filter_level = av_clip_uintp2(filter_level, 6); 1410 1411 interior_limit = filter_level; 1412 if (s->filter.sharpness) { 1413 interior_limit >>= (s->filter.sharpness + 3) >> 2; 1414 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); 1415 } 1416 interior_limit = FFMAX(interior_limit, 1); 1417 1418 f->filter_level = filter_level; 1419 f->inner_limit = interior_limit; 1420 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; 1421} 1422 1423static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 1424{ 1425 int mbedge_lim, bedge_lim, hev_thresh; 1426 int filter_level = f->filter_level; 1427 int inner_limit = f->inner_limit; 1428 int inner_filter = f->inner_filter; 1429 int linesize = s->linesize; 1430 int uvlinesize = s->uvlinesize; 1431 static const uint8_t hev_thresh_lut[2][64] = { 1432 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1433 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1434 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1435 3, 3, 3, 3 }, 1436 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1438 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1439 2, 2, 2, 2 } 1440 }; 1441 1442 if (!filter_level) 1443 return; 1444 1445 bedge_lim = 2*filter_level + inner_limit; 1446 mbedge_lim = bedge_lim + 4; 1447 1448 hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; 1449 1450 if (mb_x) { 1451 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, 1452 mbedge_lim, inner_limit, hev_thresh); 1453 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 1454 mbedge_lim, inner_limit, hev_thresh); 1455 } 1456 1457 if (inner_filter) { 1458 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, 1459 inner_limit, hev_thresh); 1460 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, 1461 inner_limit, hev_thresh); 1462 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, 1463 inner_limit, hev_thresh); 1464 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, 1465 uvlinesize, bedge_lim, 1466 inner_limit, hev_thresh); 1467 } 1468 1469 if (mb_y) { 1470 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, 1471 mbedge_lim, inner_limit, hev_thresh); 1472 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 1473 mbedge_lim, inner_limit, hev_thresh); 1474 } 1475 1476 if (inner_filter) { 1477 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, 1478 linesize, bedge_lim, 1479 inner_limit, hev_thresh); 1480 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, 1481 linesize, bedge_lim, 1482 inner_limit, hev_thresh); 1483 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, 1484 linesize, bedge_lim, 1485 inner_limit, hev_thresh); 1486 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 1487 dst[2] + 4 * uvlinesize, 1488 uvlinesize, bedge_lim, 1489 inner_limit, hev_thresh); 1490 } 1491} 1492 1493static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 1494{ 1495 int mbedge_lim, bedge_lim; 1496 int filter_level = f->filter_level; 1497 int inner_limit = f->inner_limit; 1498 int inner_filter = f->inner_filter; 1499 int linesize = s->linesize; 1500 1501 if (!filter_level) 1502 return; 1503 1504 bedge_lim = 2*filter_level + inner_limit; 1505 mbedge_lim = bedge_lim + 4; 1506 1507 if (mb_x) 1508 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 1509 if (inner_filter) { 1510 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); 1511 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); 1512 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); 1513 } 1514 1515 if (mb_y) 1516 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 1517 if (inner_filter) { 1518 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); 1519 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); 1520 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); 1521 } 1522} 1523 1524static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y) 1525{ 1526 VP8FilterStrength *f = s->filter_strength; 1527 uint8_t *dst[3] = { 1528 curframe->data[0] + 16*mb_y*s->linesize, 1529 curframe->data[1] + 8*mb_y*s->uvlinesize, 1530 curframe->data[2] + 8*mb_y*s->uvlinesize 1531 }; 1532 int mb_x; 1533 1534 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1535 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); 1536 filter_mb(s, dst, f++, mb_x, mb_y); 1537 dst[0] += 16; 1538 dst[1] += 8; 1539 dst[2] += 8; 1540 } 1541} 1542 1543static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y) 1544{ 1545 VP8FilterStrength *f = s->filter_strength; 1546 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize; 1547 int mb_x; 1548 1549 for (mb_x = 0; mb_x < s->mb_width; mb_x++) { 1550 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); 1551 filter_mb_simple(s, dst, f++, mb_x, mb_y); 1552 dst += 16; 1553 } 1554} 1555 1556static void release_queued_segmaps(VP8Context *s, int is_close) 1557{ 1558 int leave_behind = is_close ? 0 : !s->maps_are_invalid; 1559 while (s->num_maps_to_be_freed > leave_behind) 1560 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]); 1561 s->maps_are_invalid = 0; 1562} 1563 1564static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 1565 AVPacket *avpkt) 1566{ 1567 VP8Context *s = avctx->priv_data; 1568 int ret, mb_x, mb_y, i, y, referenced; 1569 enum AVDiscard skip_thresh; 1570 AVFrame *av_uninit(curframe), *prev_frame; 1571 1572 release_queued_segmaps(s, 0); 1573 1574 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) 1575 return ret; 1576 1577 prev_frame = s->framep[VP56_FRAME_CURRENT]; 1578 1579 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT 1580 || s->update_altref == VP56_FRAME_CURRENT; 1581 1582 skip_thresh = !referenced ? AVDISCARD_NONREF : 1583 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; 1584 1585 if (avctx->skip_frame >= skip_thresh) { 1586 s->invisible = 1; 1587 goto skip_decode; 1588 } 1589 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; 1590 1591 // release no longer referenced frames 1592 for (i = 0; i < 5; i++) 1593 if (s->frames[i].data[0] && 1594 &s->frames[i] != prev_frame && 1595 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && 1596 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && 1597 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) 1598 vp8_release_frame(s, &s->frames[i], 1, 0); 1599 1600 // find a free buffer 1601 for (i = 0; i < 5; i++) 1602 if (&s->frames[i] != prev_frame && 1603 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && 1604 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && 1605 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { 1606 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; 1607 break; 1608 } 1609 if (i == 5) { 1610 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n"); 1611 abort(); 1612 } 1613 if (curframe->data[0]) 1614 vp8_release_frame(s, curframe, 1, 0); 1615 1616 curframe->key_frame = s->keyframe; 1617 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; 1618 curframe->reference = referenced ? 3 : 0; 1619 if ((ret = vp8_alloc_frame(s, curframe))) { 1620 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); 1621 return ret; 1622 } 1623 1624 // check if golden and altref are swapped 1625 if (s->update_altref != VP56_FRAME_NONE) { 1626 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; 1627 } else { 1628 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; 1629 } 1630 if (s->update_golden != VP56_FRAME_NONE) { 1631 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; 1632 } else { 1633 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; 1634 } 1635 if (s->update_last) { 1636 s->next_framep[VP56_FRAME_PREVIOUS] = curframe; 1637 } else { 1638 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; 1639 } 1640 s->next_framep[VP56_FRAME_CURRENT] = curframe; 1641 1642 ff_thread_finish_setup(avctx); 1643 1644 // Given that arithmetic probabilities are updated every frame, it's quite likely 1645 // that the values we have on a random interframe are complete junk if we didn't 1646 // start decode on a keyframe. So just don't display anything rather than junk. 1647 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || 1648 !s->framep[VP56_FRAME_GOLDEN] || 1649 !s->framep[VP56_FRAME_GOLDEN2])) { 1650 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); 1651 return AVERROR_INVALIDDATA; 1652 } 1653 1654 s->linesize = curframe->linesize[0]; 1655 s->uvlinesize = curframe->linesize[1]; 1656 1657 if (!s->edge_emu_buffer) 1658 s->edge_emu_buffer = av_malloc(21*s->linesize); 1659 1660 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); 1661 1662 /* Zero macroblock structures for top/top-left prediction from outside the frame. */ 1663 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); 1664 1665 // top edge of 127 for intra prediction 1666 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { 1667 s->top_border[0][15] = s->top_border[0][23] = 127; 1668 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1); 1669 } 1670 memset(s->ref_count, 0, sizeof(s->ref_count)); 1671 if (s->keyframe) 1672 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); 1673 1674#define MARGIN (16 << 2) 1675 s->mv_min.y = -MARGIN; 1676 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; 1677 1678 for (mb_y = 0; mb_y < s->mb_height; mb_y++) { 1679 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; 1680 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; 1681 int mb_xy = mb_y*s->mb_width; 1682 uint8_t *dst[3] = { 1683 curframe->data[0] + 16*mb_y*s->linesize, 1684 curframe->data[1] + 8*mb_y*s->uvlinesize, 1685 curframe->data[2] + 8*mb_y*s->uvlinesize 1686 }; 1687 1688 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock 1689 memset(s->left_nnz, 0, sizeof(s->left_nnz)); 1690 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); 1691 1692 // left edge of 129 for intra prediction 1693 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { 1694 for (i = 0; i < 3; i++) 1695 for (y = 0; y < 16>>!!i; y++) 1696 dst[i][y*curframe->linesize[i]-1] = 129; 1697 if (mb_y == 1) // top left edge is also 129 1698 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129; 1699 } 1700 1701 s->mv_min.x = -MARGIN; 1702 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; 1703 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) 1704 ff_thread_await_progress(prev_frame, mb_y, 0); 1705 1706 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { 1707 /* Prefetch the current frame, 4 MBs ahead */ 1708 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); 1709 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); 1710 1711 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy, 1712 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL); 1713 1714 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 1715 1716 if (!mb->skip) 1717 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); 1718 1719 if (mb->mode <= MODE_I4x4) 1720 intra_predict(s, dst, mb, mb_x, mb_y); 1721 else 1722 inter_predict(s, dst, mb, mb_x, mb_y); 1723 1724 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 1725 1726 if (!mb->skip) { 1727 idct_mb(s, dst, mb); 1728 } else { 1729 AV_ZERO64(s->left_nnz); 1730 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned 1731 1732 // Reset DC block predictors if they would exist if the mb had coefficients 1733 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { 1734 s->left_nnz[8] = 0; 1735 s->top_nnz[mb_x][8] = 0; 1736 } 1737 } 1738 1739 if (s->deblock_filter) 1740 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]); 1741 1742 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 1743 1744 dst[0] += 16; 1745 dst[1] += 8; 1746 dst[2] += 8; 1747 s->mv_min.x -= 64; 1748 s->mv_max.x -= 64; 1749 } 1750 if (s->deblock_filter) { 1751 if (s->filter.simple) 1752 filter_mb_row_simple(s, curframe, mb_y); 1753 else 1754 filter_mb_row(s, curframe, mb_y); 1755 } 1756 s->mv_min.y -= 64; 1757 s->mv_max.y -= 64; 1758 1759 ff_thread_report_progress(curframe, mb_y, 0); 1760 } 1761 1762 ff_thread_report_progress(curframe, INT_MAX, 0); 1763skip_decode: 1764 // if future frames don't use the updated probabilities, 1765 // reset them to the values we saved 1766 if (!s->update_probabilities) 1767 s->prob[0] = s->prob[1]; 1768 1769 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); 1770 1771 if (!s->invisible) { 1772 *(AVFrame*)data = *curframe; 1773 *data_size = sizeof(AVFrame); 1774 } 1775 1776 return avpkt->size; 1777} 1778 1779static av_cold int vp8_decode_init(AVCodecContext *avctx) 1780{ 1781 VP8Context *s = avctx->priv_data; 1782 1783 s->avctx = avctx; 1784 avctx->pix_fmt = PIX_FMT_YUV420P; 1785 1786 dsputil_init(&s->dsp, avctx); 1787 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1); 1788 ff_vp8dsp_init(&s->vp8dsp); 1789 1790 return 0; 1791} 1792 1793static av_cold int vp8_decode_free(AVCodecContext *avctx) 1794{ 1795 vp8_decode_flush_impl(avctx, 0, 1, 1); 1796 release_queued_segmaps(avctx->priv_data, 1); 1797 return 0; 1798} 1799 1800static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx) 1801{ 1802 VP8Context *s = avctx->priv_data; 1803 1804 s->avctx = avctx; 1805 1806 return 0; 1807} 1808 1809#define REBASE(pic) \ 1810 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL 1811 1812static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) 1813{ 1814 VP8Context *s = dst->priv_data, *s_src = src->priv_data; 1815 1816 if (s->macroblocks_base && 1817 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) { 1818 free_buffers(s); 1819 s->maps_are_invalid = 1; 1820 } 1821 1822 s->prob[0] = s_src->prob[!s_src->update_probabilities]; 1823 s->segmentation = s_src->segmentation; 1824 s->lf_delta = s_src->lf_delta; 1825 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); 1826 1827 memcpy(&s->frames, &s_src->frames, sizeof(s->frames)); 1828 s->framep[0] = REBASE(s_src->next_framep[0]); 1829 s->framep[1] = REBASE(s_src->next_framep[1]); 1830 s->framep[2] = REBASE(s_src->next_framep[2]); 1831 s->framep[3] = REBASE(s_src->next_framep[3]); 1832 1833 return 0; 1834} 1835 1836AVCodec ff_vp8_decoder = { 1837 .name = "vp8", 1838 .type = AVMEDIA_TYPE_VIDEO, 1839 .id = CODEC_ID_VP8, 1840 .priv_data_size = sizeof(VP8Context), 1841 .init = vp8_decode_init, 1842 .close = vp8_decode_free, 1843 .decode = vp8_decode_frame, 1844 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS, 1845 .flush = vp8_decode_flush, 1846 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), 1847 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy), 1848 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context), 1849}; 1850