1/* 2 * Copyright (C) 2003-2004 the ffmpeg project 3 * 4 * This file is part of Libav. 5 * 6 * Libav is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * Libav is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with Libav; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21/** 22 * @file 23 * On2 VP3 Video Decoder 24 * 25 * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx) 26 * For more information about the VP3 coding process, visit: 27 * http://wiki.multimedia.cx/index.php?title=On2_VP3 28 * 29 * Theora decoder by Alex Beregszaszi 30 */ 31 32#include <stdio.h> 33#include <stdlib.h> 34#include <string.h> 35 36#include "libavutil/imgutils.h" 37#include "avcodec.h" 38#include "internal.h" 39#include "dsputil.h" 40#include "get_bits.h" 41 42#include "vp3data.h" 43#include "xiph.h" 44#include "thread.h" 45 46#define FRAGMENT_PIXELS 8 47 48//FIXME split things out into their own arrays 49typedef struct Vp3Fragment { 50 int16_t dc; 51 uint8_t coding_method; 52 uint8_t qpi; 53} Vp3Fragment; 54 55#define SB_NOT_CODED 0 56#define SB_PARTIALLY_CODED 1 57#define SB_FULLY_CODED 2 58 59// This is the maximum length of a single long bit run that can be encoded 60// for superblock coding or block qps. Theora special-cases this to read a 61// bit instead of flipping the current bit to allow for runs longer than 4129. 62#define MAXIMUM_LONG_BIT_RUN 4129 63 64#define MODE_INTER_NO_MV 0 65#define MODE_INTRA 1 66#define MODE_INTER_PLUS_MV 2 67#define MODE_INTER_LAST_MV 3 68#define MODE_INTER_PRIOR_LAST 4 69#define MODE_USING_GOLDEN 5 70#define MODE_GOLDEN_MV 6 71#define MODE_INTER_FOURMV 7 72#define CODING_MODE_COUNT 8 73 74/* special internal mode */ 75#define MODE_COPY 8 76 77/* There are 6 preset schemes, plus a free-form scheme */ 78static const int ModeAlphabet[6][CODING_MODE_COUNT] = 79{ 80 /* scheme 1: Last motion vector dominates */ 81 { MODE_INTER_LAST_MV, MODE_INTER_PRIOR_LAST, 82 MODE_INTER_PLUS_MV, MODE_INTER_NO_MV, 83 MODE_INTRA, MODE_USING_GOLDEN, 84 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 85 86 /* scheme 2 */ 87 { MODE_INTER_LAST_MV, MODE_INTER_PRIOR_LAST, 88 MODE_INTER_NO_MV, MODE_INTER_PLUS_MV, 89 MODE_INTRA, MODE_USING_GOLDEN, 90 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 91 92 /* scheme 3 */ 93 { MODE_INTER_LAST_MV, MODE_INTER_PLUS_MV, 94 MODE_INTER_PRIOR_LAST, MODE_INTER_NO_MV, 95 MODE_INTRA, MODE_USING_GOLDEN, 96 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 97 98 /* scheme 4 */ 99 { MODE_INTER_LAST_MV, MODE_INTER_PLUS_MV, 100 MODE_INTER_NO_MV, MODE_INTER_PRIOR_LAST, 101 MODE_INTRA, MODE_USING_GOLDEN, 102 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 103 104 /* scheme 5: No motion vector dominates */ 105 { MODE_INTER_NO_MV, MODE_INTER_LAST_MV, 106 MODE_INTER_PRIOR_LAST, MODE_INTER_PLUS_MV, 107 MODE_INTRA, MODE_USING_GOLDEN, 108 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 109 110 /* scheme 6 */ 111 { MODE_INTER_NO_MV, MODE_USING_GOLDEN, 112 MODE_INTER_LAST_MV, MODE_INTER_PRIOR_LAST, 113 MODE_INTER_PLUS_MV, MODE_INTRA, 114 MODE_GOLDEN_MV, MODE_INTER_FOURMV }, 115 116}; 117 118static const uint8_t hilbert_offset[16][2] = { 119 {0,0}, {1,0}, {1,1}, {0,1}, 120 {0,2}, {0,3}, {1,3}, {1,2}, 121 {2,2}, {2,3}, {3,3}, {3,2}, 122 {3,1}, {2,1}, {2,0}, {3,0} 123}; 124 125#define MIN_DEQUANT_VAL 2 126 127typedef struct Vp3DecodeContext { 128 AVCodecContext *avctx; 129 int theora, theora_tables; 130 int version; 131 int width, height; 132 int chroma_x_shift, chroma_y_shift; 133 AVFrame golden_frame; 134 AVFrame last_frame; 135 AVFrame current_frame; 136 int keyframe; 137 DSPContext dsp; 138 int flipped_image; 139 int last_slice_end; 140 int skip_loop_filter; 141 142 int qps[3]; 143 int nqps; 144 int last_qps[3]; 145 146 int superblock_count; 147 int y_superblock_width; 148 int y_superblock_height; 149 int y_superblock_count; 150 int c_superblock_width; 151 int c_superblock_height; 152 int c_superblock_count; 153 int u_superblock_start; 154 int v_superblock_start; 155 unsigned char *superblock_coding; 156 157 int macroblock_count; 158 int macroblock_width; 159 int macroblock_height; 160 161 int fragment_count; 162 int fragment_width[2]; 163 int fragment_height[2]; 164 165 Vp3Fragment *all_fragments; 166 int fragment_start[3]; 167 int data_offset[3]; 168 169 int8_t (*motion_val[2])[2]; 170 171 ScanTable scantable; 172 173 /* tables */ 174 uint16_t coded_dc_scale_factor[64]; 175 uint32_t coded_ac_scale_factor[64]; 176 uint8_t base_matrix[384][64]; 177 uint8_t qr_count[2][3]; 178 uint8_t qr_size [2][3][64]; 179 uint16_t qr_base[2][3][64]; 180 181 /** 182 * This is a list of all tokens in bitstream order. Reordering takes place 183 * by pulling from each level during IDCT. As a consequence, IDCT must be 184 * in Hilbert order, making the minimum slice height 64 for 4:2:0 and 32 185 * otherwise. The 32 different tokens with up to 12 bits of extradata are 186 * collapsed into 3 types, packed as follows: 187 * (from the low to high bits) 188 * 189 * 2 bits: type (0,1,2) 190 * 0: EOB run, 14 bits for run length (12 needed) 191 * 1: zero run, 7 bits for run length 192 * 7 bits for the next coefficient (3 needed) 193 * 2: coefficient, 14 bits (11 needed) 194 * 195 * Coefficients are signed, so are packed in the highest bits for automatic 196 * sign extension. 197 */ 198 int16_t *dct_tokens[3][64]; 199 int16_t *dct_tokens_base; 200#define TOKEN_EOB(eob_run) ((eob_run) << 2) 201#define TOKEN_ZERO_RUN(coeff, zero_run) (((coeff) << 9) + ((zero_run) << 2) + 1) 202#define TOKEN_COEFF(coeff) (((coeff) << 2) + 2) 203 204 /** 205 * number of blocks that contain DCT coefficients at the given level or higher 206 */ 207 int num_coded_frags[3][64]; 208 int total_num_coded_frags; 209 210 /* this is a list of indexes into the all_fragments array indicating 211 * which of the fragments are coded */ 212 int *coded_fragment_list[3]; 213 214 VLC dc_vlc[16]; 215 VLC ac_vlc_1[16]; 216 VLC ac_vlc_2[16]; 217 VLC ac_vlc_3[16]; 218 VLC ac_vlc_4[16]; 219 220 VLC superblock_run_length_vlc; 221 VLC fragment_run_length_vlc; 222 VLC mode_code_vlc; 223 VLC motion_vector_vlc; 224 225 /* these arrays need to be on 16-byte boundaries since SSE2 operations 226 * index into them */ 227 DECLARE_ALIGNED(16, int16_t, qmat)[3][2][3][64]; ///< qmat[qpi][is_inter][plane] 228 229 /* This table contains superblock_count * 16 entries. Each set of 16 230 * numbers corresponds to the fragment indexes 0..15 of the superblock. 231 * An entry will be -1 to indicate that no entry corresponds to that 232 * index. */ 233 int *superblock_fragments; 234 235 /* This is an array that indicates how a particular macroblock 236 * is coded. */ 237 unsigned char *macroblock_coding; 238 239 uint8_t *edge_emu_buffer; 240 241 /* Huffman decode */ 242 int hti; 243 unsigned int hbits; 244 int entries; 245 int huff_code_size; 246 uint32_t huffman_table[80][32][2]; 247 248 uint8_t filter_limit_values[64]; 249 DECLARE_ALIGNED(8, int, bounding_values_array)[256+2]; 250} Vp3DecodeContext; 251 252/************************************************************************ 253 * VP3 specific functions 254 ************************************************************************/ 255 256static void vp3_decode_flush(AVCodecContext *avctx) 257{ 258 Vp3DecodeContext *s = avctx->priv_data; 259 260 if (s->golden_frame.data[0]) { 261 if (s->golden_frame.data[0] == s->last_frame.data[0]) 262 memset(&s->last_frame, 0, sizeof(AVFrame)); 263 if (s->current_frame.data[0] == s->golden_frame.data[0]) 264 memset(&s->current_frame, 0, sizeof(AVFrame)); 265 ff_thread_release_buffer(avctx, &s->golden_frame); 266 } 267 if (s->last_frame.data[0]) { 268 if (s->current_frame.data[0] == s->last_frame.data[0]) 269 memset(&s->current_frame, 0, sizeof(AVFrame)); 270 ff_thread_release_buffer(avctx, &s->last_frame); 271 } 272 if (s->current_frame.data[0]) 273 ff_thread_release_buffer(avctx, &s->current_frame); 274} 275 276static av_cold int vp3_decode_end(AVCodecContext *avctx) 277{ 278 Vp3DecodeContext *s = avctx->priv_data; 279 int i; 280 281 av_free(s->superblock_coding); 282 av_free(s->all_fragments); 283 av_free(s->coded_fragment_list[0]); 284 av_free(s->dct_tokens_base); 285 av_free(s->superblock_fragments); 286 av_free(s->macroblock_coding); 287 av_free(s->motion_val[0]); 288 av_free(s->motion_val[1]); 289 av_free(s->edge_emu_buffer); 290 291 if (avctx->internal->is_copy) 292 return 0; 293 294 for (i = 0; i < 16; i++) { 295 ff_free_vlc(&s->dc_vlc[i]); 296 ff_free_vlc(&s->ac_vlc_1[i]); 297 ff_free_vlc(&s->ac_vlc_2[i]); 298 ff_free_vlc(&s->ac_vlc_3[i]); 299 ff_free_vlc(&s->ac_vlc_4[i]); 300 } 301 302 ff_free_vlc(&s->superblock_run_length_vlc); 303 ff_free_vlc(&s->fragment_run_length_vlc); 304 ff_free_vlc(&s->mode_code_vlc); 305 ff_free_vlc(&s->motion_vector_vlc); 306 307 /* release all frames */ 308 vp3_decode_flush(avctx); 309 310 return 0; 311} 312 313/* 314 * This function sets up all of the various blocks mappings: 315 * superblocks <-> fragments, macroblocks <-> fragments, 316 * superblocks <-> macroblocks 317 * 318 * @return 0 is successful; returns 1 if *anything* went wrong. 319 */ 320static int init_block_mapping(Vp3DecodeContext *s) 321{ 322 int sb_x, sb_y, plane; 323 int x, y, i, j = 0; 324 325 for (plane = 0; plane < 3; plane++) { 326 int sb_width = plane ? s->c_superblock_width : s->y_superblock_width; 327 int sb_height = plane ? s->c_superblock_height : s->y_superblock_height; 328 int frag_width = s->fragment_width[!!plane]; 329 int frag_height = s->fragment_height[!!plane]; 330 331 for (sb_y = 0; sb_y < sb_height; sb_y++) 332 for (sb_x = 0; sb_x < sb_width; sb_x++) 333 for (i = 0; i < 16; i++) { 334 x = 4*sb_x + hilbert_offset[i][0]; 335 y = 4*sb_y + hilbert_offset[i][1]; 336 337 if (x < frag_width && y < frag_height) 338 s->superblock_fragments[j++] = s->fragment_start[plane] + y*frag_width + x; 339 else 340 s->superblock_fragments[j++] = -1; 341 } 342 } 343 344 return 0; /* successful path out */ 345} 346 347/* 348 * This function sets up the dequantization tables used for a particular 349 * frame. 350 */ 351static void init_dequantizer(Vp3DecodeContext *s, int qpi) 352{ 353 int ac_scale_factor = s->coded_ac_scale_factor[s->qps[qpi]]; 354 int dc_scale_factor = s->coded_dc_scale_factor[s->qps[qpi]]; 355 int i, plane, inter, qri, bmi, bmj, qistart; 356 357 for(inter=0; inter<2; inter++){ 358 for(plane=0; plane<3; plane++){ 359 int sum=0; 360 for(qri=0; qri<s->qr_count[inter][plane]; qri++){ 361 sum+= s->qr_size[inter][plane][qri]; 362 if(s->qps[qpi] <= sum) 363 break; 364 } 365 qistart= sum - s->qr_size[inter][plane][qri]; 366 bmi= s->qr_base[inter][plane][qri ]; 367 bmj= s->qr_base[inter][plane][qri+1]; 368 for(i=0; i<64; i++){ 369 int coeff= ( 2*(sum -s->qps[qpi])*s->base_matrix[bmi][i] 370 - 2*(qistart-s->qps[qpi])*s->base_matrix[bmj][i] 371 + s->qr_size[inter][plane][qri]) 372 / (2*s->qr_size[inter][plane][qri]); 373 374 int qmin= 8<<(inter + !i); 375 int qscale= i ? ac_scale_factor : dc_scale_factor; 376 377 s->qmat[qpi][inter][plane][s->dsp.idct_permutation[i]]= av_clip((qscale * coeff)/100 * 4, qmin, 4096); 378 } 379 // all DC coefficients use the same quant so as not to interfere with DC prediction 380 s->qmat[qpi][inter][plane][0] = s->qmat[0][inter][plane][0]; 381 } 382 } 383} 384 385/* 386 * This function initializes the loop filter boundary limits if the frame's 387 * quality index is different from the previous frame's. 388 * 389 * The filter_limit_values may not be larger than 127. 390 */ 391static void init_loop_filter(Vp3DecodeContext *s) 392{ 393 int *bounding_values= s->bounding_values_array+127; 394 int filter_limit; 395 int x; 396 int value; 397 398 filter_limit = s->filter_limit_values[s->qps[0]]; 399 400 /* set up the bounding values */ 401 memset(s->bounding_values_array, 0, 256 * sizeof(int)); 402 for (x = 0; x < filter_limit; x++) { 403 bounding_values[-x] = -x; 404 bounding_values[x] = x; 405 } 406 for (x = value = filter_limit; x < 128 && value; x++, value--) { 407 bounding_values[ x] = value; 408 bounding_values[-x] = -value; 409 } 410 if (value) 411 bounding_values[128] = value; 412 bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202; 413} 414 415/* 416 * This function unpacks all of the superblock/macroblock/fragment coding 417 * information from the bitstream. 418 */ 419static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) 420{ 421 int superblock_starts[3] = { 0, s->u_superblock_start, s->v_superblock_start }; 422 int bit = 0; 423 int current_superblock = 0; 424 int current_run = 0; 425 int num_partial_superblocks = 0; 426 427 int i, j; 428 int current_fragment; 429 int plane; 430 431 if (s->keyframe) { 432 memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count); 433 434 } else { 435 436 /* unpack the list of partially-coded superblocks */ 437 bit = get_bits1(gb) ^ 1; 438 current_run = 0; 439 440 while (current_superblock < s->superblock_count && get_bits_left(gb) > 0) { 441 if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN) 442 bit = get_bits1(gb); 443 else 444 bit ^= 1; 445 446 current_run = get_vlc2(gb, 447 s->superblock_run_length_vlc.table, 6, 2) + 1; 448 if (current_run == 34) 449 current_run += get_bits(gb, 12); 450 451 if (current_superblock + current_run > s->superblock_count) { 452 av_log(s->avctx, AV_LOG_ERROR, "Invalid partially coded superblock run length\n"); 453 return -1; 454 } 455 456 memset(s->superblock_coding + current_superblock, bit, current_run); 457 458 current_superblock += current_run; 459 if (bit) 460 num_partial_superblocks += current_run; 461 } 462 463 /* unpack the list of fully coded superblocks if any of the blocks were 464 * not marked as partially coded in the previous step */ 465 if (num_partial_superblocks < s->superblock_count) { 466 int superblocks_decoded = 0; 467 468 current_superblock = 0; 469 bit = get_bits1(gb) ^ 1; 470 current_run = 0; 471 472 while (superblocks_decoded < s->superblock_count - num_partial_superblocks 473 && get_bits_left(gb) > 0) { 474 475 if (s->theora && current_run == MAXIMUM_LONG_BIT_RUN) 476 bit = get_bits1(gb); 477 else 478 bit ^= 1; 479 480 current_run = get_vlc2(gb, 481 s->superblock_run_length_vlc.table, 6, 2) + 1; 482 if (current_run == 34) 483 current_run += get_bits(gb, 12); 484 485 for (j = 0; j < current_run; current_superblock++) { 486 if (current_superblock >= s->superblock_count) { 487 av_log(s->avctx, AV_LOG_ERROR, "Invalid fully coded superblock run length\n"); 488 return -1; 489 } 490 491 /* skip any superblocks already marked as partially coded */ 492 if (s->superblock_coding[current_superblock] == SB_NOT_CODED) { 493 s->superblock_coding[current_superblock] = 2*bit; 494 j++; 495 } 496 } 497 superblocks_decoded += current_run; 498 } 499 } 500 501 /* if there were partial blocks, initialize bitstream for 502 * unpacking fragment codings */ 503 if (num_partial_superblocks) { 504 505 current_run = 0; 506 bit = get_bits1(gb); 507 /* toggle the bit because as soon as the first run length is 508 * fetched the bit will be toggled again */ 509 bit ^= 1; 510 } 511 } 512 513 /* figure out which fragments are coded; iterate through each 514 * superblock (all planes) */ 515 s->total_num_coded_frags = 0; 516 memset(s->macroblock_coding, MODE_COPY, s->macroblock_count); 517 518 for (plane = 0; plane < 3; plane++) { 519 int sb_start = superblock_starts[plane]; 520 int sb_end = sb_start + (plane ? s->c_superblock_count : s->y_superblock_count); 521 int num_coded_frags = 0; 522 523 for (i = sb_start; i < sb_end && get_bits_left(gb) > 0; i++) { 524 525 /* iterate through all 16 fragments in a superblock */ 526 for (j = 0; j < 16; j++) { 527 528 /* if the fragment is in bounds, check its coding status */ 529 current_fragment = s->superblock_fragments[i * 16 + j]; 530 if (current_fragment != -1) { 531 int coded = s->superblock_coding[i]; 532 533 if (s->superblock_coding[i] == SB_PARTIALLY_CODED) { 534 535 /* fragment may or may not be coded; this is the case 536 * that cares about the fragment coding runs */ 537 if (current_run-- == 0) { 538 bit ^= 1; 539 current_run = get_vlc2(gb, 540 s->fragment_run_length_vlc.table, 5, 2); 541 } 542 coded = bit; 543 } 544 545 if (coded) { 546 /* default mode; actual mode will be decoded in 547 * the next phase */ 548 s->all_fragments[current_fragment].coding_method = 549 MODE_INTER_NO_MV; 550 s->coded_fragment_list[plane][num_coded_frags++] = 551 current_fragment; 552 } else { 553 /* not coded; copy this fragment from the prior frame */ 554 s->all_fragments[current_fragment].coding_method = 555 MODE_COPY; 556 } 557 } 558 } 559 } 560 s->total_num_coded_frags += num_coded_frags; 561 for (i = 0; i < 64; i++) 562 s->num_coded_frags[plane][i] = num_coded_frags; 563 if (plane < 2) 564 s->coded_fragment_list[plane+1] = s->coded_fragment_list[plane] + num_coded_frags; 565 } 566 return 0; 567} 568 569/* 570 * This function unpacks all the coding mode data for individual macroblocks 571 * from the bitstream. 572 */ 573static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) 574{ 575 int i, j, k, sb_x, sb_y; 576 int scheme; 577 int current_macroblock; 578 int current_fragment; 579 int coding_mode; 580 int custom_mode_alphabet[CODING_MODE_COUNT]; 581 const int *alphabet; 582 Vp3Fragment *frag; 583 584 if (s->keyframe) { 585 for (i = 0; i < s->fragment_count; i++) 586 s->all_fragments[i].coding_method = MODE_INTRA; 587 588 } else { 589 590 /* fetch the mode coding scheme for this frame */ 591 scheme = get_bits(gb, 3); 592 593 /* is it a custom coding scheme? */ 594 if (scheme == 0) { 595 for (i = 0; i < 8; i++) 596 custom_mode_alphabet[i] = MODE_INTER_NO_MV; 597 for (i = 0; i < 8; i++) 598 custom_mode_alphabet[get_bits(gb, 3)] = i; 599 alphabet = custom_mode_alphabet; 600 } else 601 alphabet = ModeAlphabet[scheme-1]; 602 603 /* iterate through all of the macroblocks that contain 1 or more 604 * coded fragments */ 605 for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) { 606 for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) { 607 if (get_bits_left(gb) <= 0) 608 return -1; 609 610 for (j = 0; j < 4; j++) { 611 int mb_x = 2*sb_x + (j>>1); 612 int mb_y = 2*sb_y + (((j>>1)+j)&1); 613 current_macroblock = mb_y * s->macroblock_width + mb_x; 614 615 if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height) 616 continue; 617 618#define BLOCK_X (2*mb_x + (k&1)) 619#define BLOCK_Y (2*mb_y + (k>>1)) 620 /* coding modes are only stored if the macroblock has at least one 621 * luma block coded, otherwise it must be INTER_NO_MV */ 622 for (k = 0; k < 4; k++) { 623 current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X; 624 if (s->all_fragments[current_fragment].coding_method != MODE_COPY) 625 break; 626 } 627 if (k == 4) { 628 s->macroblock_coding[current_macroblock] = MODE_INTER_NO_MV; 629 continue; 630 } 631 632 /* mode 7 means get 3 bits for each coding mode */ 633 if (scheme == 7) 634 coding_mode = get_bits(gb, 3); 635 else 636 coding_mode = alphabet 637 [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)]; 638 639 s->macroblock_coding[current_macroblock] = coding_mode; 640 for (k = 0; k < 4; k++) { 641 frag = s->all_fragments + BLOCK_Y*s->fragment_width[0] + BLOCK_X; 642 if (frag->coding_method != MODE_COPY) 643 frag->coding_method = coding_mode; 644 } 645 646#define SET_CHROMA_MODES \ 647 if (frag[s->fragment_start[1]].coding_method != MODE_COPY) \ 648 frag[s->fragment_start[1]].coding_method = coding_mode;\ 649 if (frag[s->fragment_start[2]].coding_method != MODE_COPY) \ 650 frag[s->fragment_start[2]].coding_method = coding_mode; 651 652 if (s->chroma_y_shift) { 653 frag = s->all_fragments + mb_y*s->fragment_width[1] + mb_x; 654 SET_CHROMA_MODES 655 } else if (s->chroma_x_shift) { 656 frag = s->all_fragments + 2*mb_y*s->fragment_width[1] + mb_x; 657 for (k = 0; k < 2; k++) { 658 SET_CHROMA_MODES 659 frag += s->fragment_width[1]; 660 } 661 } else { 662 for (k = 0; k < 4; k++) { 663 frag = s->all_fragments + BLOCK_Y*s->fragment_width[1] + BLOCK_X; 664 SET_CHROMA_MODES 665 } 666 } 667 } 668 } 669 } 670 } 671 672 return 0; 673} 674 675/* 676 * This function unpacks all the motion vectors for the individual 677 * macroblocks from the bitstream. 678 */ 679static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) 680{ 681 int j, k, sb_x, sb_y; 682 int coding_mode; 683 int motion_x[4]; 684 int motion_y[4]; 685 int last_motion_x = 0; 686 int last_motion_y = 0; 687 int prior_last_motion_x = 0; 688 int prior_last_motion_y = 0; 689 int current_macroblock; 690 int current_fragment; 691 int frag; 692 693 if (s->keyframe) 694 return 0; 695 696 /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ 697 coding_mode = get_bits1(gb); 698 699 /* iterate through all of the macroblocks that contain 1 or more 700 * coded fragments */ 701 for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) { 702 for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) { 703 if (get_bits_left(gb) <= 0) 704 return -1; 705 706 for (j = 0; j < 4; j++) { 707 int mb_x = 2*sb_x + (j>>1); 708 int mb_y = 2*sb_y + (((j>>1)+j)&1); 709 current_macroblock = mb_y * s->macroblock_width + mb_x; 710 711 if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height || 712 (s->macroblock_coding[current_macroblock] == MODE_COPY)) 713 continue; 714 715 switch (s->macroblock_coding[current_macroblock]) { 716 717 case MODE_INTER_PLUS_MV: 718 case MODE_GOLDEN_MV: 719 /* all 6 fragments use the same motion vector */ 720 if (coding_mode == 0) { 721 motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; 722 motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; 723 } else { 724 motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)]; 725 motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)]; 726 } 727 728 /* vector maintenance, only on MODE_INTER_PLUS_MV */ 729 if (s->macroblock_coding[current_macroblock] == 730 MODE_INTER_PLUS_MV) { 731 prior_last_motion_x = last_motion_x; 732 prior_last_motion_y = last_motion_y; 733 last_motion_x = motion_x[0]; 734 last_motion_y = motion_y[0]; 735 } 736 break; 737 738 case MODE_INTER_FOURMV: 739 /* vector maintenance */ 740 prior_last_motion_x = last_motion_x; 741 prior_last_motion_y = last_motion_y; 742 743 /* fetch 4 vectors from the bitstream, one for each 744 * Y fragment, then average for the C fragment vectors */ 745 for (k = 0; k < 4; k++) { 746 current_fragment = BLOCK_Y*s->fragment_width[0] + BLOCK_X; 747 if (s->all_fragments[current_fragment].coding_method != MODE_COPY) { 748 if (coding_mode == 0) { 749 motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; 750 motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; 751 } else { 752 motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)]; 753 motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)]; 754 } 755 last_motion_x = motion_x[k]; 756 last_motion_y = motion_y[k]; 757 } else { 758 motion_x[k] = 0; 759 motion_y[k] = 0; 760 } 761 } 762 break; 763 764 case MODE_INTER_LAST_MV: 765 /* all 6 fragments use the last motion vector */ 766 motion_x[0] = last_motion_x; 767 motion_y[0] = last_motion_y; 768 769 /* no vector maintenance (last vector remains the 770 * last vector) */ 771 break; 772 773 case MODE_INTER_PRIOR_LAST: 774 /* all 6 fragments use the motion vector prior to the 775 * last motion vector */ 776 motion_x[0] = prior_last_motion_x; 777 motion_y[0] = prior_last_motion_y; 778 779 /* vector maintenance */ 780 prior_last_motion_x = last_motion_x; 781 prior_last_motion_y = last_motion_y; 782 last_motion_x = motion_x[0]; 783 last_motion_y = motion_y[0]; 784 break; 785 786 default: 787 /* covers intra, inter without MV, golden without MV */ 788 motion_x[0] = 0; 789 motion_y[0] = 0; 790 791 /* no vector maintenance */ 792 break; 793 } 794 795 /* assign the motion vectors to the correct fragments */ 796 for (k = 0; k < 4; k++) { 797 current_fragment = 798 BLOCK_Y*s->fragment_width[0] + BLOCK_X; 799 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 800 s->motion_val[0][current_fragment][0] = motion_x[k]; 801 s->motion_val[0][current_fragment][1] = motion_y[k]; 802 } else { 803 s->motion_val[0][current_fragment][0] = motion_x[0]; 804 s->motion_val[0][current_fragment][1] = motion_y[0]; 805 } 806 } 807 808 if (s->chroma_y_shift) { 809 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 810 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1] + motion_x[2] + motion_x[3], 2); 811 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1] + motion_y[2] + motion_y[3], 2); 812 } 813 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); 814 motion_y[0] = (motion_y[0]>>1) | (motion_y[0]&1); 815 frag = mb_y*s->fragment_width[1] + mb_x; 816 s->motion_val[1][frag][0] = motion_x[0]; 817 s->motion_val[1][frag][1] = motion_y[0]; 818 } else if (s->chroma_x_shift) { 819 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 820 motion_x[0] = RSHIFT(motion_x[0] + motion_x[1], 1); 821 motion_y[0] = RSHIFT(motion_y[0] + motion_y[1], 1); 822 motion_x[1] = RSHIFT(motion_x[2] + motion_x[3], 1); 823 motion_y[1] = RSHIFT(motion_y[2] + motion_y[3], 1); 824 } else { 825 motion_x[1] = motion_x[0]; 826 motion_y[1] = motion_y[0]; 827 } 828 motion_x[0] = (motion_x[0]>>1) | (motion_x[0]&1); 829 motion_x[1] = (motion_x[1]>>1) | (motion_x[1]&1); 830 831 frag = 2*mb_y*s->fragment_width[1] + mb_x; 832 for (k = 0; k < 2; k++) { 833 s->motion_val[1][frag][0] = motion_x[k]; 834 s->motion_val[1][frag][1] = motion_y[k]; 835 frag += s->fragment_width[1]; 836 } 837 } else { 838 for (k = 0; k < 4; k++) { 839 frag = BLOCK_Y*s->fragment_width[1] + BLOCK_X; 840 if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { 841 s->motion_val[1][frag][0] = motion_x[k]; 842 s->motion_val[1][frag][1] = motion_y[k]; 843 } else { 844 s->motion_val[1][frag][0] = motion_x[0]; 845 s->motion_val[1][frag][1] = motion_y[0]; 846 } 847 } 848 } 849 } 850 } 851 } 852 853 return 0; 854} 855 856static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb) 857{ 858 int qpi, i, j, bit, run_length, blocks_decoded, num_blocks_at_qpi; 859 int num_blocks = s->total_num_coded_frags; 860 861 for (qpi = 0; qpi < s->nqps-1 && num_blocks > 0; qpi++) { 862 i = blocks_decoded = num_blocks_at_qpi = 0; 863 864 bit = get_bits1(gb) ^ 1; 865 run_length = 0; 866 867 do { 868 if (run_length == MAXIMUM_LONG_BIT_RUN) 869 bit = get_bits1(gb); 870 else 871 bit ^= 1; 872 873 run_length = get_vlc2(gb, s->superblock_run_length_vlc.table, 6, 2) + 1; 874 if (run_length == 34) 875 run_length += get_bits(gb, 12); 876 blocks_decoded += run_length; 877 878 if (!bit) 879 num_blocks_at_qpi += run_length; 880 881 for (j = 0; j < run_length; i++) { 882 if (i >= s->total_num_coded_frags) 883 return -1; 884 885 if (s->all_fragments[s->coded_fragment_list[0][i]].qpi == qpi) { 886 s->all_fragments[s->coded_fragment_list[0][i]].qpi += bit; 887 j++; 888 } 889 } 890 } while (blocks_decoded < num_blocks && get_bits_left(gb) > 0); 891 892 num_blocks -= num_blocks_at_qpi; 893 } 894 895 return 0; 896} 897 898/* 899 * This function is called by unpack_dct_coeffs() to extract the VLCs from 900 * the bitstream. The VLCs encode tokens which are used to unpack DCT 901 * data. This function unpacks all the VLCs for either the Y plane or both 902 * C planes, and is called for DC coefficients or different AC coefficient 903 * levels (since different coefficient types require different VLC tables. 904 * 905 * This function returns a residual eob run. E.g, if a particular token gave 906 * instructions to EOB the next 5 fragments and there were only 2 fragments 907 * left in the current fragment range, 3 would be returned so that it could 908 * be passed into the next call to this same function. 909 */ 910static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, 911 VLC *table, int coeff_index, 912 int plane, 913 int eob_run) 914{ 915 int i, j = 0; 916 int token; 917 int zero_run = 0; 918 DCTELEM coeff = 0; 919 int bits_to_get; 920 int blocks_ended; 921 int coeff_i = 0; 922 int num_coeffs = s->num_coded_frags[plane][coeff_index]; 923 int16_t *dct_tokens = s->dct_tokens[plane][coeff_index]; 924 925 /* local references to structure members to avoid repeated deferences */ 926 int *coded_fragment_list = s->coded_fragment_list[plane]; 927 Vp3Fragment *all_fragments = s->all_fragments; 928 VLC_TYPE (*vlc_table)[2] = table->table; 929 930 if (num_coeffs < 0) 931 av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coefficents at level %d\n", coeff_index); 932 933 if (eob_run > num_coeffs) { 934 coeff_i = blocks_ended = num_coeffs; 935 eob_run -= num_coeffs; 936 } else { 937 coeff_i = blocks_ended = eob_run; 938 eob_run = 0; 939 } 940 941 // insert fake EOB token to cover the split between planes or zzi 942 if (blocks_ended) 943 dct_tokens[j++] = blocks_ended << 2; 944 945 while (coeff_i < num_coeffs && get_bits_left(gb) > 0) { 946 /* decode a VLC into a token */ 947 token = get_vlc2(gb, vlc_table, 11, 3); 948 /* use the token to get a zero run, a coefficient, and an eob run */ 949 if ((unsigned) token <= 6U) { 950 eob_run = eob_run_base[token]; 951 if (eob_run_get_bits[token]) 952 eob_run += get_bits(gb, eob_run_get_bits[token]); 953 954 // record only the number of blocks ended in this plane, 955 // any spill will be recorded in the next plane. 956 if (eob_run > num_coeffs - coeff_i) { 957 dct_tokens[j++] = TOKEN_EOB(num_coeffs - coeff_i); 958 blocks_ended += num_coeffs - coeff_i; 959 eob_run -= num_coeffs - coeff_i; 960 coeff_i = num_coeffs; 961 } else { 962 dct_tokens[j++] = TOKEN_EOB(eob_run); 963 blocks_ended += eob_run; 964 coeff_i += eob_run; 965 eob_run = 0; 966 } 967 } else if (token >= 0) { 968 bits_to_get = coeff_get_bits[token]; 969 if (bits_to_get) 970 bits_to_get = get_bits(gb, bits_to_get); 971 coeff = coeff_tables[token][bits_to_get]; 972 973 zero_run = zero_run_base[token]; 974 if (zero_run_get_bits[token]) 975 zero_run += get_bits(gb, zero_run_get_bits[token]); 976 977 if (zero_run) { 978 dct_tokens[j++] = TOKEN_ZERO_RUN(coeff, zero_run); 979 } else { 980 // Save DC into the fragment structure. DC prediction is 981 // done in raster order, so the actual DC can't be in with 982 // other tokens. We still need the token in dct_tokens[] 983 // however, or else the structure collapses on itself. 984 if (!coeff_index) 985 all_fragments[coded_fragment_list[coeff_i]].dc = coeff; 986 987 dct_tokens[j++] = TOKEN_COEFF(coeff); 988 } 989 990 if (coeff_index + zero_run > 64) { 991 av_log(s->avctx, AV_LOG_DEBUG, "Invalid zero run of %d with" 992 " %d coeffs left\n", zero_run, 64-coeff_index); 993 zero_run = 64 - coeff_index; 994 } 995 996 // zero runs code multiple coefficients, 997 // so don't try to decode coeffs for those higher levels 998 for (i = coeff_index+1; i <= coeff_index+zero_run; i++) 999 s->num_coded_frags[plane][i]--; 1000 coeff_i++; 1001 } else { 1002 av_log(s->avctx, AV_LOG_ERROR, 1003 "Invalid token %d\n", token); 1004 return -1; 1005 } 1006 } 1007 1008 if (blocks_ended > s->num_coded_frags[plane][coeff_index]) 1009 av_log(s->avctx, AV_LOG_ERROR, "More blocks ended than coded!\n"); 1010 1011 // decrement the number of blocks that have higher coeffecients for each 1012 // EOB run at this level 1013 if (blocks_ended) 1014 for (i = coeff_index+1; i < 64; i++) 1015 s->num_coded_frags[plane][i] -= blocks_ended; 1016 1017 // setup the next buffer 1018 if (plane < 2) 1019 s->dct_tokens[plane+1][coeff_index] = dct_tokens + j; 1020 else if (coeff_index < 63) 1021 s->dct_tokens[0][coeff_index+1] = dct_tokens + j; 1022 1023 return eob_run; 1024} 1025 1026static void reverse_dc_prediction(Vp3DecodeContext *s, 1027 int first_fragment, 1028 int fragment_width, 1029 int fragment_height); 1030/* 1031 * This function unpacks all of the DCT coefficient data from the 1032 * bitstream. 1033 */ 1034static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) 1035{ 1036 int i; 1037 int dc_y_table; 1038 int dc_c_table; 1039 int ac_y_table; 1040 int ac_c_table; 1041 int residual_eob_run = 0; 1042 VLC *y_tables[64]; 1043 VLC *c_tables[64]; 1044 1045 s->dct_tokens[0][0] = s->dct_tokens_base; 1046 1047 /* fetch the DC table indexes */ 1048 dc_y_table = get_bits(gb, 4); 1049 dc_c_table = get_bits(gb, 4); 1050 1051 /* unpack the Y plane DC coefficients */ 1052 residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0, 1053 0, residual_eob_run); 1054 if (residual_eob_run < 0) 1055 return residual_eob_run; 1056 1057 /* reverse prediction of the Y-plane DC coefficients */ 1058 reverse_dc_prediction(s, 0, s->fragment_width[0], s->fragment_height[0]); 1059 1060 /* unpack the C plane DC coefficients */ 1061 residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0, 1062 1, residual_eob_run); 1063 if (residual_eob_run < 0) 1064 return residual_eob_run; 1065 residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0, 1066 2, residual_eob_run); 1067 if (residual_eob_run < 0) 1068 return residual_eob_run; 1069 1070 /* reverse prediction of the C-plane DC coefficients */ 1071 if (!(s->avctx->flags & CODEC_FLAG_GRAY)) 1072 { 1073 reverse_dc_prediction(s, s->fragment_start[1], 1074 s->fragment_width[1], s->fragment_height[1]); 1075 reverse_dc_prediction(s, s->fragment_start[2], 1076 s->fragment_width[1], s->fragment_height[1]); 1077 } 1078 1079 /* fetch the AC table indexes */ 1080 ac_y_table = get_bits(gb, 4); 1081 ac_c_table = get_bits(gb, 4); 1082 1083 /* build tables of AC VLC tables */ 1084 for (i = 1; i <= 5; i++) { 1085 y_tables[i] = &s->ac_vlc_1[ac_y_table]; 1086 c_tables[i] = &s->ac_vlc_1[ac_c_table]; 1087 } 1088 for (i = 6; i <= 14; i++) { 1089 y_tables[i] = &s->ac_vlc_2[ac_y_table]; 1090 c_tables[i] = &s->ac_vlc_2[ac_c_table]; 1091 } 1092 for (i = 15; i <= 27; i++) { 1093 y_tables[i] = &s->ac_vlc_3[ac_y_table]; 1094 c_tables[i] = &s->ac_vlc_3[ac_c_table]; 1095 } 1096 for (i = 28; i <= 63; i++) { 1097 y_tables[i] = &s->ac_vlc_4[ac_y_table]; 1098 c_tables[i] = &s->ac_vlc_4[ac_c_table]; 1099 } 1100 1101 /* decode all AC coefficents */ 1102 for (i = 1; i <= 63; i++) { 1103 residual_eob_run = unpack_vlcs(s, gb, y_tables[i], i, 1104 0, residual_eob_run); 1105 if (residual_eob_run < 0) 1106 return residual_eob_run; 1107 1108 residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i, 1109 1, residual_eob_run); 1110 if (residual_eob_run < 0) 1111 return residual_eob_run; 1112 residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i, 1113 2, residual_eob_run); 1114 if (residual_eob_run < 0) 1115 return residual_eob_run; 1116 } 1117 1118 return 0; 1119} 1120 1121/* 1122 * This function reverses the DC prediction for each coded fragment in 1123 * the frame. Much of this function is adapted directly from the original 1124 * VP3 source code. 1125 */ 1126#define COMPATIBLE_FRAME(x) \ 1127 (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type) 1128#define DC_COEFF(u) s->all_fragments[u].dc 1129 1130static void reverse_dc_prediction(Vp3DecodeContext *s, 1131 int first_fragment, 1132 int fragment_width, 1133 int fragment_height) 1134{ 1135 1136#define PUL 8 1137#define PU 4 1138#define PUR 2 1139#define PL 1 1140 1141 int x, y; 1142 int i = first_fragment; 1143 1144 int predicted_dc; 1145 1146 /* DC values for the left, up-left, up, and up-right fragments */ 1147 int vl, vul, vu, vur; 1148 1149 /* indexes for the left, up-left, up, and up-right fragments */ 1150 int l, ul, u, ur; 1151 1152 /* 1153 * The 6 fields mean: 1154 * 0: up-left multiplier 1155 * 1: up multiplier 1156 * 2: up-right multiplier 1157 * 3: left multiplier 1158 */ 1159 static const int predictor_transform[16][4] = { 1160 { 0, 0, 0, 0}, 1161 { 0, 0, 0,128}, // PL 1162 { 0, 0,128, 0}, // PUR 1163 { 0, 0, 53, 75}, // PUR|PL 1164 { 0,128, 0, 0}, // PU 1165 { 0, 64, 0, 64}, // PU|PL 1166 { 0,128, 0, 0}, // PU|PUR 1167 { 0, 0, 53, 75}, // PU|PUR|PL 1168 {128, 0, 0, 0}, // PUL 1169 { 0, 0, 0,128}, // PUL|PL 1170 { 64, 0, 64, 0}, // PUL|PUR 1171 { 0, 0, 53, 75}, // PUL|PUR|PL 1172 { 0,128, 0, 0}, // PUL|PU 1173 {-104,116, 0,116}, // PUL|PU|PL 1174 { 24, 80, 24, 0}, // PUL|PU|PUR 1175 {-104,116, 0,116} // PUL|PU|PUR|PL 1176 }; 1177 1178 /* This table shows which types of blocks can use other blocks for 1179 * prediction. For example, INTRA is the only mode in this table to 1180 * have a frame number of 0. That means INTRA blocks can only predict 1181 * from other INTRA blocks. There are 2 golden frame coding types; 1182 * blocks encoding in these modes can only predict from other blocks 1183 * that were encoded with these 1 of these 2 modes. */ 1184 static const unsigned char compatible_frame[9] = { 1185 1, /* MODE_INTER_NO_MV */ 1186 0, /* MODE_INTRA */ 1187 1, /* MODE_INTER_PLUS_MV */ 1188 1, /* MODE_INTER_LAST_MV */ 1189 1, /* MODE_INTER_PRIOR_MV */ 1190 2, /* MODE_USING_GOLDEN */ 1191 2, /* MODE_GOLDEN_MV */ 1192 1, /* MODE_INTER_FOUR_MV */ 1193 3 /* MODE_COPY */ 1194 }; 1195 int current_frame_type; 1196 1197 /* there is a last DC predictor for each of the 3 frame types */ 1198 short last_dc[3]; 1199 1200 int transform = 0; 1201 1202 vul = vu = vur = vl = 0; 1203 last_dc[0] = last_dc[1] = last_dc[2] = 0; 1204 1205 /* for each fragment row... */ 1206 for (y = 0; y < fragment_height; y++) { 1207 1208 /* for each fragment in a row... */ 1209 for (x = 0; x < fragment_width; x++, i++) { 1210 1211 /* reverse prediction if this block was coded */ 1212 if (s->all_fragments[i].coding_method != MODE_COPY) { 1213 1214 current_frame_type = 1215 compatible_frame[s->all_fragments[i].coding_method]; 1216 1217 transform= 0; 1218 if(x){ 1219 l= i-1; 1220 vl = DC_COEFF(l); 1221 if(COMPATIBLE_FRAME(l)) 1222 transform |= PL; 1223 } 1224 if(y){ 1225 u= i-fragment_width; 1226 vu = DC_COEFF(u); 1227 if(COMPATIBLE_FRAME(u)) 1228 transform |= PU; 1229 if(x){ 1230 ul= i-fragment_width-1; 1231 vul = DC_COEFF(ul); 1232 if(COMPATIBLE_FRAME(ul)) 1233 transform |= PUL; 1234 } 1235 if(x + 1 < fragment_width){ 1236 ur= i-fragment_width+1; 1237 vur = DC_COEFF(ur); 1238 if(COMPATIBLE_FRAME(ur)) 1239 transform |= PUR; 1240 } 1241 } 1242 1243 if (transform == 0) { 1244 1245 /* if there were no fragments to predict from, use last 1246 * DC saved */ 1247 predicted_dc = last_dc[current_frame_type]; 1248 } else { 1249 1250 /* apply the appropriate predictor transform */ 1251 predicted_dc = 1252 (predictor_transform[transform][0] * vul) + 1253 (predictor_transform[transform][1] * vu) + 1254 (predictor_transform[transform][2] * vur) + 1255 (predictor_transform[transform][3] * vl); 1256 1257 predicted_dc /= 128; 1258 1259 /* check for outranging on the [ul u l] and 1260 * [ul u ur l] predictors */ 1261 if ((transform == 15) || (transform == 13)) { 1262 if (FFABS(predicted_dc - vu) > 128) 1263 predicted_dc = vu; 1264 else if (FFABS(predicted_dc - vl) > 128) 1265 predicted_dc = vl; 1266 else if (FFABS(predicted_dc - vul) > 128) 1267 predicted_dc = vul; 1268 } 1269 } 1270 1271 /* at long last, apply the predictor */ 1272 DC_COEFF(i) += predicted_dc; 1273 /* save the DC */ 1274 last_dc[current_frame_type] = DC_COEFF(i); 1275 } 1276 } 1277 } 1278} 1279 1280static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int yend) 1281{ 1282 int x, y; 1283 int *bounding_values= s->bounding_values_array+127; 1284 1285 int width = s->fragment_width[!!plane]; 1286 int height = s->fragment_height[!!plane]; 1287 int fragment = s->fragment_start [plane] + ystart * width; 1288 int stride = s->current_frame.linesize[plane]; 1289 uint8_t *plane_data = s->current_frame.data [plane]; 1290 if (!s->flipped_image) stride = -stride; 1291 plane_data += s->data_offset[plane] + 8*ystart*stride; 1292 1293 for (y = ystart; y < yend; y++) { 1294 1295 for (x = 0; x < width; x++) { 1296 /* This code basically just deblocks on the edges of coded blocks. 1297 * However, it has to be much more complicated because of the 1298 * braindamaged deblock ordering used in VP3/Theora. Order matters 1299 * because some pixels get filtered twice. */ 1300 if( s->all_fragments[fragment].coding_method != MODE_COPY ) 1301 { 1302 /* do not perform left edge filter for left columns frags */ 1303 if (x > 0) { 1304 s->dsp.vp3_h_loop_filter( 1305 plane_data + 8*x, 1306 stride, bounding_values); 1307 } 1308 1309 /* do not perform top edge filter for top row fragments */ 1310 if (y > 0) { 1311 s->dsp.vp3_v_loop_filter( 1312 plane_data + 8*x, 1313 stride, bounding_values); 1314 } 1315 1316 /* do not perform right edge filter for right column 1317 * fragments or if right fragment neighbor is also coded 1318 * in this frame (it will be filtered in next iteration) */ 1319 if ((x < width - 1) && 1320 (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { 1321 s->dsp.vp3_h_loop_filter( 1322 plane_data + 8*x + 8, 1323 stride, bounding_values); 1324 } 1325 1326 /* do not perform bottom edge filter for bottom row 1327 * fragments or if bottom fragment neighbor is also coded 1328 * in this frame (it will be filtered in the next row) */ 1329 if ((y < height - 1) && 1330 (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { 1331 s->dsp.vp3_v_loop_filter( 1332 plane_data + 8*x + 8*stride, 1333 stride, bounding_values); 1334 } 1335 } 1336 1337 fragment++; 1338 } 1339 plane_data += 8*stride; 1340 } 1341} 1342 1343/** 1344 * Pull DCT tokens from the 64 levels to decode and dequant the coefficients 1345 * for the next block in coding order 1346 */ 1347static inline int vp3_dequant(Vp3DecodeContext *s, Vp3Fragment *frag, 1348 int plane, int inter, DCTELEM block[64]) 1349{ 1350 int16_t *dequantizer = s->qmat[frag->qpi][inter][plane]; 1351 uint8_t *perm = s->scantable.permutated; 1352 int i = 0; 1353 1354 do { 1355 int token = *s->dct_tokens[plane][i]; 1356 switch (token & 3) { 1357 case 0: // EOB 1358 if (--token < 4) // 0-3 are token types, so the EOB run must now be 0 1359 s->dct_tokens[plane][i]++; 1360 else 1361 *s->dct_tokens[plane][i] = token & ~3; 1362 goto end; 1363 case 1: // zero run 1364 s->dct_tokens[plane][i]++; 1365 i += (token >> 2) & 0x7f; 1366 if (i > 63) { 1367 av_log(s->avctx, AV_LOG_ERROR, "Coefficient index overflow\n"); 1368 return i; 1369 } 1370 block[perm[i]] = (token >> 9) * dequantizer[perm[i]]; 1371 i++; 1372 break; 1373 case 2: // coeff 1374 block[perm[i]] = (token >> 2) * dequantizer[perm[i]]; 1375 s->dct_tokens[plane][i++]++; 1376 break; 1377 default: // shouldn't happen 1378 return i; 1379 } 1380 } while (i < 64); 1381 // return value is expected to be a valid level 1382 i--; 1383end: 1384 // the actual DC+prediction is in the fragment structure 1385 block[0] = frag->dc * s->qmat[0][inter][plane][0]; 1386 return i; 1387} 1388 1389/** 1390 * called when all pixels up to row y are complete 1391 */ 1392static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y) 1393{ 1394 int h, cy, i; 1395 int offset[AV_NUM_DATA_POINTERS]; 1396 1397 if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { 1398 int y_flipped = s->flipped_image ? s->avctx->height-y : y; 1399 1400 // At the end of the frame, report INT_MAX instead of the height of the frame. 1401 // This makes the other threads' ff_thread_await_progress() calls cheaper, because 1402 // they don't have to clip their values. 1403 ff_thread_report_progress(&s->current_frame, y_flipped==s->avctx->height ? INT_MAX : y_flipped-1, 0); 1404 } 1405 1406 if(s->avctx->draw_horiz_band==NULL) 1407 return; 1408 1409 h= y - s->last_slice_end; 1410 s->last_slice_end= y; 1411 y -= h; 1412 1413 if (!s->flipped_image) { 1414 y = s->avctx->height - y - h; 1415 } 1416 1417 cy = y >> s->chroma_y_shift; 1418 offset[0] = s->current_frame.linesize[0]*y; 1419 offset[1] = s->current_frame.linesize[1]*cy; 1420 offset[2] = s->current_frame.linesize[2]*cy; 1421 for (i = 3; i < AV_NUM_DATA_POINTERS; i++) 1422 offset[i] = 0; 1423 1424 emms_c(); 1425 s->avctx->draw_horiz_band(s->avctx, &s->current_frame, offset, y, 3, h); 1426} 1427 1428/** 1429 * Wait for the reference frame of the current fragment. 1430 * The progress value is in luma pixel rows. 1431 */ 1432static void await_reference_row(Vp3DecodeContext *s, Vp3Fragment *fragment, int motion_y, int y) 1433{ 1434 AVFrame *ref_frame; 1435 int ref_row; 1436 int border = motion_y&1; 1437 1438 if (fragment->coding_method == MODE_USING_GOLDEN || 1439 fragment->coding_method == MODE_GOLDEN_MV) 1440 ref_frame = &s->golden_frame; 1441 else 1442 ref_frame = &s->last_frame; 1443 1444 ref_row = y + (motion_y>>1); 1445 ref_row = FFMAX(FFABS(ref_row), ref_row + 8 + border); 1446 1447 ff_thread_await_progress(ref_frame, ref_row, 0); 1448} 1449 1450/* 1451 * Perform the final rendering for a particular slice of data. 1452 * The slice number ranges from 0..(c_superblock_height - 1). 1453 */ 1454static void render_slice(Vp3DecodeContext *s, int slice) 1455{ 1456 int x, y, i, j, fragment; 1457 LOCAL_ALIGNED_16(DCTELEM, block, [64]); 1458 int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; 1459 int motion_halfpel_index; 1460 uint8_t *motion_source; 1461 int plane, first_pixel; 1462 1463 if (slice >= s->c_superblock_height) 1464 return; 1465 1466 for (plane = 0; plane < 3; plane++) { 1467 uint8_t *output_plane = s->current_frame.data [plane] + s->data_offset[plane]; 1468 uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; 1469 uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; 1470 int stride = s->current_frame.linesize[plane]; 1471 int plane_width = s->width >> (plane && s->chroma_x_shift); 1472 int plane_height = s->height >> (plane && s->chroma_y_shift); 1473 int8_t (*motion_val)[2] = s->motion_val[!!plane]; 1474 1475 int sb_x, sb_y = slice << (!plane && s->chroma_y_shift); 1476 int slice_height = sb_y + 1 + (!plane && s->chroma_y_shift); 1477 int slice_width = plane ? s->c_superblock_width : s->y_superblock_width; 1478 1479 int fragment_width = s->fragment_width[!!plane]; 1480 int fragment_height = s->fragment_height[!!plane]; 1481 int fragment_start = s->fragment_start[plane]; 1482 int do_await = !plane && HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME); 1483 1484 if (!s->flipped_image) stride = -stride; 1485 if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY)) 1486 continue; 1487 1488 /* for each superblock row in the slice (both of them)... */ 1489 for (; sb_y < slice_height; sb_y++) { 1490 1491 /* for each superblock in a row... */ 1492 for (sb_x = 0; sb_x < slice_width; sb_x++) { 1493 1494 /* for each block in a superblock... */ 1495 for (j = 0; j < 16; j++) { 1496 x = 4*sb_x + hilbert_offset[j][0]; 1497 y = 4*sb_y + hilbert_offset[j][1]; 1498 fragment = y*fragment_width + x; 1499 1500 i = fragment_start + fragment; 1501 1502 // bounds check 1503 if (x >= fragment_width || y >= fragment_height) 1504 continue; 1505 1506 first_pixel = 8*y*stride + 8*x; 1507 1508 if (do_await && s->all_fragments[i].coding_method != MODE_INTRA) 1509 await_reference_row(s, &s->all_fragments[i], motion_val[fragment][1], (16*y) >> s->chroma_y_shift); 1510 1511 /* transform if this block was coded */ 1512 if (s->all_fragments[i].coding_method != MODE_COPY) { 1513 if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) || 1514 (s->all_fragments[i].coding_method == MODE_GOLDEN_MV)) 1515 motion_source= golden_plane; 1516 else 1517 motion_source= last_plane; 1518 1519 motion_source += first_pixel; 1520 motion_halfpel_index = 0; 1521 1522 /* sort out the motion vector if this fragment is coded 1523 * using a motion vector method */ 1524 if ((s->all_fragments[i].coding_method > MODE_INTRA) && 1525 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { 1526 int src_x, src_y; 1527 motion_x = motion_val[fragment][0]; 1528 motion_y = motion_val[fragment][1]; 1529 1530 src_x= (motion_x>>1) + 8*x; 1531 src_y= (motion_y>>1) + 8*y; 1532 1533 motion_halfpel_index = motion_x & 0x01; 1534 motion_source += (motion_x >> 1); 1535 1536 motion_halfpel_index |= (motion_y & 0x01) << 1; 1537 motion_source += ((motion_y >> 1) * stride); 1538 1539 if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){ 1540 uint8_t *temp= s->edge_emu_buffer; 1541 if(stride<0) temp -= 8*stride; 1542 1543 s->dsp.emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height); 1544 motion_source= temp; 1545 } 1546 } 1547 1548 1549 /* first, take care of copying a block from either the 1550 * previous or the golden frame */ 1551 if (s->all_fragments[i].coding_method != MODE_INTRA) { 1552 /* Note, it is possible to implement all MC cases with 1553 put_no_rnd_pixels_l2 which would look more like the 1554 VP3 source but this would be slower as 1555 put_no_rnd_pixels_tab is better optimzed */ 1556 if(motion_halfpel_index != 3){ 1557 s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( 1558 output_plane + first_pixel, 1559 motion_source, stride, 8); 1560 }else{ 1561 int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 1562 s->dsp.put_no_rnd_pixels_l2[1]( 1563 output_plane + first_pixel, 1564 motion_source - d, 1565 motion_source + stride + 1 + d, 1566 stride, 8); 1567 } 1568 } 1569 1570 s->dsp.clear_block(block); 1571 1572 /* invert DCT and place (or add) in final output */ 1573 1574 if (s->all_fragments[i].coding_method == MODE_INTRA) { 1575 int index; 1576 index = vp3_dequant(s, s->all_fragments + i, plane, 0, block); 1577 if (index > 63) 1578 continue; 1579 if(s->avctx->idct_algo!=FF_IDCT_VP3) 1580 block[0] += 128<<3; 1581 s->dsp.idct_put( 1582 output_plane + first_pixel, 1583 stride, 1584 block); 1585 } else { 1586 int index = vp3_dequant(s, s->all_fragments + i, plane, 1, block); 1587 if (index > 63) 1588 continue; 1589 if (index > 0) { 1590 s->dsp.idct_add( 1591 output_plane + first_pixel, 1592 stride, 1593 block); 1594 } else { 1595 s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block); 1596 } 1597 } 1598 } else { 1599 1600 /* copy directly from the previous frame */ 1601 s->dsp.put_pixels_tab[1][0]( 1602 output_plane + first_pixel, 1603 last_plane + first_pixel, 1604 stride, 8); 1605 1606 } 1607 } 1608 } 1609 1610 // Filter up to the last row in the superblock row 1611 if (!s->skip_loop_filter) 1612 apply_loop_filter(s, plane, 4*sb_y - !!sb_y, FFMIN(4*sb_y+3, fragment_height-1)); 1613 } 1614 } 1615 1616 /* this looks like a good place for slice dispatch... */ 1617 /* algorithm: 1618 * if (slice == s->macroblock_height - 1) 1619 * dispatch (both last slice & 2nd-to-last slice); 1620 * else if (slice > 0) 1621 * dispatch (slice - 1); 1622 */ 1623 1624 vp3_draw_horiz_band(s, FFMIN((32 << s->chroma_y_shift) * (slice + 1) -16, s->height-16)); 1625} 1626 1627/// Allocate tables for per-frame data in Vp3DecodeContext 1628static av_cold int allocate_tables(AVCodecContext *avctx) 1629{ 1630 Vp3DecodeContext *s = avctx->priv_data; 1631 int y_fragment_count, c_fragment_count; 1632 1633 y_fragment_count = s->fragment_width[0] * s->fragment_height[0]; 1634 c_fragment_count = s->fragment_width[1] * s->fragment_height[1]; 1635 1636 s->superblock_coding = av_malloc(s->superblock_count); 1637 s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); 1638 s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int)); 1639 s->dct_tokens_base = av_malloc(64*s->fragment_count * sizeof(*s->dct_tokens_base)); 1640 s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0])); 1641 s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1])); 1642 1643 /* work out the block mapping tables */ 1644 s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int)); 1645 s->macroblock_coding = av_malloc(s->macroblock_count + 1); 1646 1647 if (!s->superblock_coding || !s->all_fragments || !s->dct_tokens_base || 1648 !s->coded_fragment_list[0] || !s->superblock_fragments || !s->macroblock_coding || 1649 !s->motion_val[0] || !s->motion_val[1]) { 1650 vp3_decode_end(avctx); 1651 return -1; 1652 } 1653 1654 init_block_mapping(s); 1655 1656 return 0; 1657} 1658 1659static av_cold int vp3_decode_init(AVCodecContext *avctx) 1660{ 1661 Vp3DecodeContext *s = avctx->priv_data; 1662 int i, inter, plane; 1663 int c_width; 1664 int c_height; 1665 int y_fragment_count, c_fragment_count; 1666 1667 if (avctx->codec_tag == MKTAG('V','P','3','0')) 1668 s->version = 0; 1669 else 1670 s->version = 1; 1671 1672 s->avctx = avctx; 1673 s->width = FFALIGN(avctx->width, 16); 1674 s->height = FFALIGN(avctx->height, 16); 1675 if (avctx->pix_fmt == PIX_FMT_NONE) 1676 avctx->pix_fmt = PIX_FMT_YUV420P; 1677 avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; 1678 if(avctx->idct_algo==FF_IDCT_AUTO) 1679 avctx->idct_algo=FF_IDCT_VP3; 1680 dsputil_init(&s->dsp, avctx); 1681 1682 ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); 1683 1684 /* initialize to an impossible value which will force a recalculation 1685 * in the first frame decode */ 1686 for (i = 0; i < 3; i++) 1687 s->qps[i] = -1; 1688 1689 avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift); 1690 1691 s->y_superblock_width = (s->width + 31) / 32; 1692 s->y_superblock_height = (s->height + 31) / 32; 1693 s->y_superblock_count = s->y_superblock_width * s->y_superblock_height; 1694 1695 /* work out the dimensions for the C planes */ 1696 c_width = s->width >> s->chroma_x_shift; 1697 c_height = s->height >> s->chroma_y_shift; 1698 s->c_superblock_width = (c_width + 31) / 32; 1699 s->c_superblock_height = (c_height + 31) / 32; 1700 s->c_superblock_count = s->c_superblock_width * s->c_superblock_height; 1701 1702 s->superblock_count = s->y_superblock_count + (s->c_superblock_count * 2); 1703 s->u_superblock_start = s->y_superblock_count; 1704 s->v_superblock_start = s->u_superblock_start + s->c_superblock_count; 1705 1706 s->macroblock_width = (s->width + 15) / 16; 1707 s->macroblock_height = (s->height + 15) / 16; 1708 s->macroblock_count = s->macroblock_width * s->macroblock_height; 1709 1710 s->fragment_width[0] = s->width / FRAGMENT_PIXELS; 1711 s->fragment_height[0] = s->height / FRAGMENT_PIXELS; 1712 s->fragment_width[1] = s->fragment_width[0] >> s->chroma_x_shift; 1713 s->fragment_height[1] = s->fragment_height[0] >> s->chroma_y_shift; 1714 1715 /* fragment count covers all 8x8 blocks for all 3 planes */ 1716 y_fragment_count = s->fragment_width[0] * s->fragment_height[0]; 1717 c_fragment_count = s->fragment_width[1] * s->fragment_height[1]; 1718 s->fragment_count = y_fragment_count + 2*c_fragment_count; 1719 s->fragment_start[1] = y_fragment_count; 1720 s->fragment_start[2] = y_fragment_count + c_fragment_count; 1721 1722 if (!s->theora_tables) 1723 { 1724 for (i = 0; i < 64; i++) { 1725 s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i]; 1726 s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i]; 1727 s->base_matrix[0][i] = vp31_intra_y_dequant[i]; 1728 s->base_matrix[1][i] = vp31_intra_c_dequant[i]; 1729 s->base_matrix[2][i] = vp31_inter_dequant[i]; 1730 s->filter_limit_values[i] = vp31_filter_limit_values[i]; 1731 } 1732 1733 for(inter=0; inter<2; inter++){ 1734 for(plane=0; plane<3; plane++){ 1735 s->qr_count[inter][plane]= 1; 1736 s->qr_size [inter][plane][0]= 63; 1737 s->qr_base [inter][plane][0]= 1738 s->qr_base [inter][plane][1]= 2*inter + (!!plane)*!inter; 1739 } 1740 } 1741 1742 /* init VLC tables */ 1743 for (i = 0; i < 16; i++) { 1744 1745 /* DC histograms */ 1746 init_vlc(&s->dc_vlc[i], 11, 32, 1747 &dc_bias[i][0][1], 4, 2, 1748 &dc_bias[i][0][0], 4, 2, 0); 1749 1750 /* group 1 AC histograms */ 1751 init_vlc(&s->ac_vlc_1[i], 11, 32, 1752 &ac_bias_0[i][0][1], 4, 2, 1753 &ac_bias_0[i][0][0], 4, 2, 0); 1754 1755 /* group 2 AC histograms */ 1756 init_vlc(&s->ac_vlc_2[i], 11, 32, 1757 &ac_bias_1[i][0][1], 4, 2, 1758 &ac_bias_1[i][0][0], 4, 2, 0); 1759 1760 /* group 3 AC histograms */ 1761 init_vlc(&s->ac_vlc_3[i], 11, 32, 1762 &ac_bias_2[i][0][1], 4, 2, 1763 &ac_bias_2[i][0][0], 4, 2, 0); 1764 1765 /* group 4 AC histograms */ 1766 init_vlc(&s->ac_vlc_4[i], 11, 32, 1767 &ac_bias_3[i][0][1], 4, 2, 1768 &ac_bias_3[i][0][0], 4, 2, 0); 1769 } 1770 } else { 1771 1772 for (i = 0; i < 16; i++) { 1773 /* DC histograms */ 1774 if (init_vlc(&s->dc_vlc[i], 11, 32, 1775 &s->huffman_table[i][0][1], 8, 4, 1776 &s->huffman_table[i][0][0], 8, 4, 0) < 0) 1777 goto vlc_fail; 1778 1779 /* group 1 AC histograms */ 1780 if (init_vlc(&s->ac_vlc_1[i], 11, 32, 1781 &s->huffman_table[i+16][0][1], 8, 4, 1782 &s->huffman_table[i+16][0][0], 8, 4, 0) < 0) 1783 goto vlc_fail; 1784 1785 /* group 2 AC histograms */ 1786 if (init_vlc(&s->ac_vlc_2[i], 11, 32, 1787 &s->huffman_table[i+16*2][0][1], 8, 4, 1788 &s->huffman_table[i+16*2][0][0], 8, 4, 0) < 0) 1789 goto vlc_fail; 1790 1791 /* group 3 AC histograms */ 1792 if (init_vlc(&s->ac_vlc_3[i], 11, 32, 1793 &s->huffman_table[i+16*3][0][1], 8, 4, 1794 &s->huffman_table[i+16*3][0][0], 8, 4, 0) < 0) 1795 goto vlc_fail; 1796 1797 /* group 4 AC histograms */ 1798 if (init_vlc(&s->ac_vlc_4[i], 11, 32, 1799 &s->huffman_table[i+16*4][0][1], 8, 4, 1800 &s->huffman_table[i+16*4][0][0], 8, 4, 0) < 0) 1801 goto vlc_fail; 1802 } 1803 } 1804 1805 init_vlc(&s->superblock_run_length_vlc, 6, 34, 1806 &superblock_run_length_vlc_table[0][1], 4, 2, 1807 &superblock_run_length_vlc_table[0][0], 4, 2, 0); 1808 1809 init_vlc(&s->fragment_run_length_vlc, 5, 30, 1810 &fragment_run_length_vlc_table[0][1], 4, 2, 1811 &fragment_run_length_vlc_table[0][0], 4, 2, 0); 1812 1813 init_vlc(&s->mode_code_vlc, 3, 8, 1814 &mode_code_vlc_table[0][1], 2, 1, 1815 &mode_code_vlc_table[0][0], 2, 1, 0); 1816 1817 init_vlc(&s->motion_vector_vlc, 6, 63, 1818 &motion_vector_vlc_table[0][1], 2, 1, 1819 &motion_vector_vlc_table[0][0], 2, 1, 0); 1820 1821 for (i = 0; i < 3; i++) { 1822 s->current_frame.data[i] = NULL; 1823 s->last_frame.data[i] = NULL; 1824 s->golden_frame.data[i] = NULL; 1825 } 1826 1827 return allocate_tables(avctx); 1828 1829vlc_fail: 1830 av_log(avctx, AV_LOG_FATAL, "Invalid huffman table\n"); 1831 return -1; 1832} 1833 1834/// Release and shuffle frames after decode finishes 1835static void update_frames(AVCodecContext *avctx) 1836{ 1837 Vp3DecodeContext *s = avctx->priv_data; 1838 1839 /* release the last frame, if it is allocated and if it is not the 1840 * golden frame */ 1841 if (s->last_frame.data[0] && s->last_frame.type != FF_BUFFER_TYPE_COPY) 1842 ff_thread_release_buffer(avctx, &s->last_frame); 1843 1844 /* shuffle frames (last = current) */ 1845 s->last_frame= s->current_frame; 1846 1847 if (s->keyframe) { 1848 if (s->golden_frame.data[0]) 1849 ff_thread_release_buffer(avctx, &s->golden_frame); 1850 s->golden_frame = s->current_frame; 1851 s->last_frame.type = FF_BUFFER_TYPE_COPY; 1852 } 1853 1854 s->current_frame.data[0]= NULL; /* ensure that we catch any access to this released frame */ 1855} 1856 1857static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) 1858{ 1859 Vp3DecodeContext *s = dst->priv_data, *s1 = src->priv_data; 1860 int qps_changed = 0, i, err; 1861 1862#define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field) 1863 1864 if (!s1->current_frame.data[0] 1865 ||s->width != s1->width 1866 ||s->height!= s1->height) { 1867 if (s != s1) 1868 copy_fields(s, s1, golden_frame, current_frame); 1869 return -1; 1870 } 1871 1872 if (s != s1) { 1873 // init tables if the first frame hasn't been decoded 1874 if (!s->current_frame.data[0]) { 1875 int y_fragment_count, c_fragment_count; 1876 s->avctx = dst; 1877 err = allocate_tables(dst); 1878 if (err) 1879 return err; 1880 y_fragment_count = s->fragment_width[0] * s->fragment_height[0]; 1881 c_fragment_count = s->fragment_width[1] * s->fragment_height[1]; 1882 memcpy(s->motion_val[0], s1->motion_val[0], y_fragment_count * sizeof(*s->motion_val[0])); 1883 memcpy(s->motion_val[1], s1->motion_val[1], c_fragment_count * sizeof(*s->motion_val[1])); 1884 } 1885 1886 // copy previous frame data 1887 copy_fields(s, s1, golden_frame, dsp); 1888 1889 // copy qscale data if necessary 1890 for (i = 0; i < 3; i++) { 1891 if (s->qps[i] != s1->qps[1]) { 1892 qps_changed = 1; 1893 memcpy(&s->qmat[i], &s1->qmat[i], sizeof(s->qmat[i])); 1894 } 1895 } 1896 1897 if (s->qps[0] != s1->qps[0]) 1898 memcpy(&s->bounding_values_array, &s1->bounding_values_array, sizeof(s->bounding_values_array)); 1899 1900 if (qps_changed) 1901 copy_fields(s, s1, qps, superblock_count); 1902#undef copy_fields 1903 } 1904 1905 update_frames(dst); 1906 1907 return 0; 1908} 1909 1910static int vp3_decode_frame(AVCodecContext *avctx, 1911 void *data, int *data_size, 1912 AVPacket *avpkt) 1913{ 1914 const uint8_t *buf = avpkt->data; 1915 int buf_size = avpkt->size; 1916 Vp3DecodeContext *s = avctx->priv_data; 1917 GetBitContext gb; 1918 int i; 1919 1920 init_get_bits(&gb, buf, buf_size * 8); 1921 1922 if (s->theora && get_bits1(&gb)) 1923 { 1924 av_log(avctx, AV_LOG_ERROR, "Header packet passed to frame decoder, skipping\n"); 1925 return -1; 1926 } 1927 1928 s->keyframe = !get_bits1(&gb); 1929 if (!s->theora) 1930 skip_bits(&gb, 1); 1931 for (i = 0; i < 3; i++) 1932 s->last_qps[i] = s->qps[i]; 1933 1934 s->nqps=0; 1935 do{ 1936 s->qps[s->nqps++]= get_bits(&gb, 6); 1937 } while(s->theora >= 0x030200 && s->nqps<3 && get_bits1(&gb)); 1938 for (i = s->nqps; i < 3; i++) 1939 s->qps[i] = -1; 1940 1941 if (s->avctx->debug & FF_DEBUG_PICT_INFO) 1942 av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n", 1943 s->keyframe?"key":"", avctx->frame_number+1, s->qps[0]); 1944 1945 s->skip_loop_filter = !s->filter_limit_values[s->qps[0]] || 1946 avctx->skip_loop_filter >= (s->keyframe ? AVDISCARD_ALL : AVDISCARD_NONKEY); 1947 1948 if (s->qps[0] != s->last_qps[0]) 1949 init_loop_filter(s); 1950 1951 for (i = 0; i < s->nqps; i++) 1952 // reinit all dequantizers if the first one changed, because 1953 // the DC of the first quantizer must be used for all matrices 1954 if (s->qps[i] != s->last_qps[i] || s->qps[0] != s->last_qps[0]) 1955 init_dequantizer(s, i); 1956 1957 if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe) 1958 return buf_size; 1959 1960 s->current_frame.reference = 3; 1961 s->current_frame.pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; 1962 if (ff_thread_get_buffer(avctx, &s->current_frame) < 0) { 1963 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); 1964 goto error; 1965 } 1966 1967 if (!s->edge_emu_buffer) 1968 s->edge_emu_buffer = av_malloc(9*FFABS(s->current_frame.linesize[0])); 1969 1970 if (s->keyframe) { 1971 if (!s->theora) 1972 { 1973 skip_bits(&gb, 4); /* width code */ 1974 skip_bits(&gb, 4); /* height code */ 1975 if (s->version) 1976 { 1977 s->version = get_bits(&gb, 5); 1978 if (avctx->frame_number == 0) 1979 av_log(s->avctx, AV_LOG_DEBUG, "VP version: %d\n", s->version); 1980 } 1981 } 1982 if (s->version || s->theora) 1983 { 1984 if (get_bits1(&gb)) 1985 av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n"); 1986 skip_bits(&gb, 2); /* reserved? */ 1987 } 1988 } else { 1989 if (!s->golden_frame.data[0]) { 1990 av_log(s->avctx, AV_LOG_WARNING, "vp3: first frame not a keyframe\n"); 1991 1992 s->golden_frame.reference = 3; 1993 s->golden_frame.pict_type = AV_PICTURE_TYPE_I; 1994 if (ff_thread_get_buffer(avctx, &s->golden_frame) < 0) { 1995 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); 1996 goto error; 1997 } 1998 s->last_frame = s->golden_frame; 1999 s->last_frame.type = FF_BUFFER_TYPE_COPY; 2000 ff_thread_report_progress(&s->last_frame, INT_MAX, 0); 2001 } 2002 } 2003 2004 memset(s->all_fragments, 0, s->fragment_count * sizeof(Vp3Fragment)); 2005 ff_thread_finish_setup(avctx); 2006 2007 if (unpack_superblocks(s, &gb)){ 2008 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n"); 2009 goto error; 2010 } 2011 if (unpack_modes(s, &gb)){ 2012 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n"); 2013 goto error; 2014 } 2015 if (unpack_vectors(s, &gb)){ 2016 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n"); 2017 goto error; 2018 } 2019 if (unpack_block_qpis(s, &gb)){ 2020 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_block_qpis\n"); 2021 goto error; 2022 } 2023 if (unpack_dct_coeffs(s, &gb)){ 2024 av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n"); 2025 goto error; 2026 } 2027 2028 for (i = 0; i < 3; i++) { 2029 int height = s->height >> (i && s->chroma_y_shift); 2030 if (s->flipped_image) 2031 s->data_offset[i] = 0; 2032 else 2033 s->data_offset[i] = (height-1) * s->current_frame.linesize[i]; 2034 } 2035 2036 s->last_slice_end = 0; 2037 for (i = 0; i < s->c_superblock_height; i++) 2038 render_slice(s, i); 2039 2040 // filter the last row 2041 for (i = 0; i < 3; i++) { 2042 int row = (s->height >> (3+(i && s->chroma_y_shift))) - 1; 2043 apply_loop_filter(s, i, row, row+1); 2044 } 2045 vp3_draw_horiz_band(s, s->avctx->height); 2046 2047 *data_size=sizeof(AVFrame); 2048 *(AVFrame*)data= s->current_frame; 2049 2050 if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) 2051 update_frames(avctx); 2052 2053 return buf_size; 2054 2055error: 2056 ff_thread_report_progress(&s->current_frame, INT_MAX, 0); 2057 2058 if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) 2059 avctx->release_buffer(avctx, &s->current_frame); 2060 2061 return -1; 2062} 2063 2064static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb) 2065{ 2066 Vp3DecodeContext *s = avctx->priv_data; 2067 2068 if (get_bits1(gb)) { 2069 int token; 2070 if (s->entries >= 32) { /* overflow */ 2071 av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n"); 2072 return -1; 2073 } 2074 token = get_bits(gb, 5); 2075 //av_log(avctx, AV_LOG_DEBUG, "hti %d hbits %x token %d entry : %d size %d\n", s->hti, s->hbits, token, s->entries, s->huff_code_size); 2076 s->huffman_table[s->hti][token][0] = s->hbits; 2077 s->huffman_table[s->hti][token][1] = s->huff_code_size; 2078 s->entries++; 2079 } 2080 else { 2081 if (s->huff_code_size >= 32) {/* overflow */ 2082 av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n"); 2083 return -1; 2084 } 2085 s->huff_code_size++; 2086 s->hbits <<= 1; 2087 if (read_huffman_tree(avctx, gb)) 2088 return -1; 2089 s->hbits |= 1; 2090 if (read_huffman_tree(avctx, gb)) 2091 return -1; 2092 s->hbits >>= 1; 2093 s->huff_code_size--; 2094 } 2095 return 0; 2096} 2097 2098static int vp3_init_thread_copy(AVCodecContext *avctx) 2099{ 2100 Vp3DecodeContext *s = avctx->priv_data; 2101 2102 s->superblock_coding = NULL; 2103 s->all_fragments = NULL; 2104 s->coded_fragment_list[0] = NULL; 2105 s->dct_tokens_base = NULL; 2106 s->superblock_fragments = NULL; 2107 s->macroblock_coding = NULL; 2108 s->motion_val[0] = NULL; 2109 s->motion_val[1] = NULL; 2110 s->edge_emu_buffer = NULL; 2111 2112 return 0; 2113} 2114 2115#if CONFIG_THEORA_DECODER 2116static const enum PixelFormat theora_pix_fmts[4] = { 2117 PIX_FMT_YUV420P, PIX_FMT_NONE, PIX_FMT_YUV422P, PIX_FMT_YUV444P 2118}; 2119 2120static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) 2121{ 2122 Vp3DecodeContext *s = avctx->priv_data; 2123 int visible_width, visible_height, colorspace; 2124 int offset_x = 0, offset_y = 0; 2125 AVRational fps, aspect; 2126 2127 s->theora = get_bits_long(gb, 24); 2128 av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora); 2129 2130 /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */ 2131 /* but previous versions have the image flipped relative to vp3 */ 2132 if (s->theora < 0x030200) 2133 { 2134 s->flipped_image = 1; 2135 av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n"); 2136 } 2137 2138 visible_width = s->width = get_bits(gb, 16) << 4; 2139 visible_height = s->height = get_bits(gb, 16) << 4; 2140 2141 if(av_image_check_size(s->width, s->height, 0, avctx)){ 2142 av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height); 2143 s->width= s->height= 0; 2144 return -1; 2145 } 2146 2147 if (s->theora >= 0x030200) { 2148 visible_width = get_bits_long(gb, 24); 2149 visible_height = get_bits_long(gb, 24); 2150 2151 offset_x = get_bits(gb, 8); /* offset x */ 2152 offset_y = get_bits(gb, 8); /* offset y, from bottom */ 2153 } 2154 2155 fps.num = get_bits_long(gb, 32); 2156 fps.den = get_bits_long(gb, 32); 2157 if (fps.num && fps.den) { 2158 av_reduce(&avctx->time_base.num, &avctx->time_base.den, 2159 fps.den, fps.num, 1<<30); 2160 } 2161 2162 aspect.num = get_bits_long(gb, 24); 2163 aspect.den = get_bits_long(gb, 24); 2164 if (aspect.num && aspect.den) { 2165 av_reduce(&avctx->sample_aspect_ratio.num, 2166 &avctx->sample_aspect_ratio.den, 2167 aspect.num, aspect.den, 1<<30); 2168 } 2169 2170 if (s->theora < 0x030200) 2171 skip_bits(gb, 5); /* keyframe frequency force */ 2172 colorspace = get_bits(gb, 8); 2173 skip_bits(gb, 24); /* bitrate */ 2174 2175 skip_bits(gb, 6); /* quality hint */ 2176 2177 if (s->theora >= 0x030200) 2178 { 2179 skip_bits(gb, 5); /* keyframe frequency force */ 2180 avctx->pix_fmt = theora_pix_fmts[get_bits(gb, 2)]; 2181 skip_bits(gb, 3); /* reserved */ 2182 } 2183 2184// align_get_bits(gb); 2185 2186 if ( visible_width <= s->width && visible_width > s->width-16 2187 && visible_height <= s->height && visible_height > s->height-16 2188 && !offset_x && (offset_y == s->height - visible_height)) 2189 avcodec_set_dimensions(avctx, visible_width, visible_height); 2190 else 2191 avcodec_set_dimensions(avctx, s->width, s->height); 2192 2193 if (colorspace == 1) { 2194 avctx->color_primaries = AVCOL_PRI_BT470M; 2195 } else if (colorspace == 2) { 2196 avctx->color_primaries = AVCOL_PRI_BT470BG; 2197 } 2198 if (colorspace == 1 || colorspace == 2) { 2199 avctx->colorspace = AVCOL_SPC_BT470BG; 2200 avctx->color_trc = AVCOL_TRC_BT709; 2201 } 2202 2203 return 0; 2204} 2205 2206static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) 2207{ 2208 Vp3DecodeContext *s = avctx->priv_data; 2209 int i, n, matrices, inter, plane; 2210 2211 if (s->theora >= 0x030200) { 2212 n = get_bits(gb, 3); 2213 /* loop filter limit values table */ 2214 if (n) 2215 for (i = 0; i < 64; i++) 2216 s->filter_limit_values[i] = get_bits(gb, n); 2217 } 2218 2219 if (s->theora >= 0x030200) 2220 n = get_bits(gb, 4) + 1; 2221 else 2222 n = 16; 2223 /* quality threshold table */ 2224 for (i = 0; i < 64; i++) 2225 s->coded_ac_scale_factor[i] = get_bits(gb, n); 2226 2227 if (s->theora >= 0x030200) 2228 n = get_bits(gb, 4) + 1; 2229 else 2230 n = 16; 2231 /* dc scale factor table */ 2232 for (i = 0; i < 64; i++) 2233 s->coded_dc_scale_factor[i] = get_bits(gb, n); 2234 2235 if (s->theora >= 0x030200) 2236 matrices = get_bits(gb, 9) + 1; 2237 else 2238 matrices = 3; 2239 2240 if(matrices > 384){ 2241 av_log(avctx, AV_LOG_ERROR, "invalid number of base matrixes\n"); 2242 return -1; 2243 } 2244 2245 for(n=0; n<matrices; n++){ 2246 for (i = 0; i < 64; i++) 2247 s->base_matrix[n][i]= get_bits(gb, 8); 2248 } 2249 2250 for (inter = 0; inter <= 1; inter++) { 2251 for (plane = 0; plane <= 2; plane++) { 2252 int newqr= 1; 2253 if (inter || plane > 0) 2254 newqr = get_bits1(gb); 2255 if (!newqr) { 2256 int qtj, plj; 2257 if(inter && get_bits1(gb)){ 2258 qtj = 0; 2259 plj = plane; 2260 }else{ 2261 qtj= (3*inter + plane - 1) / 3; 2262 plj= (plane + 2) % 3; 2263 } 2264 s->qr_count[inter][plane]= s->qr_count[qtj][plj]; 2265 memcpy(s->qr_size[inter][plane], s->qr_size[qtj][plj], sizeof(s->qr_size[0][0])); 2266 memcpy(s->qr_base[inter][plane], s->qr_base[qtj][plj], sizeof(s->qr_base[0][0])); 2267 } else { 2268 int qri= 0; 2269 int qi = 0; 2270 2271 for(;;){ 2272 i= get_bits(gb, av_log2(matrices-1)+1); 2273 if(i>= matrices){ 2274 av_log(avctx, AV_LOG_ERROR, "invalid base matrix index\n"); 2275 return -1; 2276 } 2277 s->qr_base[inter][plane][qri]= i; 2278 if(qi >= 63) 2279 break; 2280 i = get_bits(gb, av_log2(63-qi)+1) + 1; 2281 s->qr_size[inter][plane][qri++]= i; 2282 qi += i; 2283 } 2284 2285 if (qi > 63) { 2286 av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi); 2287 return -1; 2288 } 2289 s->qr_count[inter][plane]= qri; 2290 } 2291 } 2292 } 2293 2294 /* Huffman tables */ 2295 for (s->hti = 0; s->hti < 80; s->hti++) { 2296 s->entries = 0; 2297 s->huff_code_size = 1; 2298 if (!get_bits1(gb)) { 2299 s->hbits = 0; 2300 if(read_huffman_tree(avctx, gb)) 2301 return -1; 2302 s->hbits = 1; 2303 if(read_huffman_tree(avctx, gb)) 2304 return -1; 2305 } 2306 } 2307 2308 s->theora_tables = 1; 2309 2310 return 0; 2311} 2312 2313static av_cold int theora_decode_init(AVCodecContext *avctx) 2314{ 2315 Vp3DecodeContext *s = avctx->priv_data; 2316 GetBitContext gb; 2317 int ptype; 2318 uint8_t *header_start[3]; 2319 int header_len[3]; 2320 int i; 2321 2322 s->theora = 1; 2323 2324 if (!avctx->extradata_size) 2325 { 2326 av_log(avctx, AV_LOG_ERROR, "Missing extradata!\n"); 2327 return -1; 2328 } 2329 2330 if (avpriv_split_xiph_headers(avctx->extradata, avctx->extradata_size, 2331 42, header_start, header_len) < 0) { 2332 av_log(avctx, AV_LOG_ERROR, "Corrupt extradata\n"); 2333 return -1; 2334 } 2335 2336 for(i=0;i<3;i++) { 2337 init_get_bits(&gb, header_start[i], header_len[i] * 8); 2338 2339 ptype = get_bits(&gb, 8); 2340 2341 if (!(ptype & 0x80)) 2342 { 2343 av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n"); 2344// return -1; 2345 } 2346 2347 // FIXME: Check for this as well. 2348 skip_bits_long(&gb, 6*8); /* "theora" */ 2349 2350 switch(ptype) 2351 { 2352 case 0x80: 2353 theora_decode_header(avctx, &gb); 2354 break; 2355 case 0x81: 2356// FIXME: is this needed? it breaks sometimes 2357// theora_decode_comments(avctx, gb); 2358 break; 2359 case 0x82: 2360 if (theora_decode_tables(avctx, &gb)) 2361 return -1; 2362 break; 2363 default: 2364 av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80); 2365 break; 2366 } 2367 if(ptype != 0x81 && 8*header_len[i] != get_bits_count(&gb)) 2368 av_log(avctx, AV_LOG_WARNING, "%d bits left in packet %X\n", 8*header_len[i] - get_bits_count(&gb), ptype); 2369 if (s->theora < 0x030200) 2370 break; 2371 } 2372 2373 return vp3_decode_init(avctx); 2374} 2375 2376AVCodec ff_theora_decoder = { 2377 .name = "theora", 2378 .type = AVMEDIA_TYPE_VIDEO, 2379 .id = CODEC_ID_THEORA, 2380 .priv_data_size = sizeof(Vp3DecodeContext), 2381 .init = theora_decode_init, 2382 .close = vp3_decode_end, 2383 .decode = vp3_decode_frame, 2384 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_FRAME_THREADS, 2385 .flush = vp3_decode_flush, 2386 .long_name = NULL_IF_CONFIG_SMALL("Theora"), 2387 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy), 2388 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context) 2389}; 2390#endif 2391 2392AVCodec ff_vp3_decoder = { 2393 .name = "vp3", 2394 .type = AVMEDIA_TYPE_VIDEO, 2395 .id = CODEC_ID_VP3, 2396 .priv_data_size = sizeof(Vp3DecodeContext), 2397 .init = vp3_decode_init, 2398 .close = vp3_decode_end, 2399 .decode = vp3_decode_frame, 2400 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_FRAME_THREADS, 2401 .flush = vp3_decode_flush, 2402 .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"), 2403 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy), 2404 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context) 2405}; 2406