1// Copyright 2010 Google Inc. 2// 3// This code is licensed under the same terms as WebM: 4// Software License Agreement: http://www.webmproject.org/license/software/ 5// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6// ----------------------------------------------------------------------------- 7// 8// Frame-reconstruction function. Memory allocation. 9// 10// Author: Skal (pascal.massimino@gmail.com) 11 12#include <stdlib.h> 13#include "vp8i.h" 14 15#if defined(__cplusplus) || defined(c_plusplus) 16extern "C" { 17#endif 18 19#define ALIGN_MASK (32 - 1) 20 21//----------------------------------------------------------------------------- 22// Memory setup 23 24// how many extra luma lines are needed for caching, given a filtering level 25static const uint8_t kFilterExtraRows[3] = { 0, 4, 8 }; 26 27int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) { 28 const int mb_w = dec->mb_w_; 29 const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t); 30 const int top_size = (16 + 8 + 8) * mb_w; 31 const int info_size = (mb_w + 1) * sizeof(VP8MB); 32 const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_); 33 const int coeffs_size = 384 * sizeof(*dec->coeffs_); 34 const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2; 35 const int cache_size = top_size * cache_height; 36 const int needed = intra_pred_mode_size 37 + top_size + info_size 38 + yuv_size + coeffs_size 39 + cache_size + ALIGN_MASK; 40 uint8_t* mem; 41 42 if (needed > dec->mem_size_) { 43 free(dec->mem_); 44 dec->mem_size_ = 0; 45 dec->mem_ = (uint8_t*)malloc(needed); 46 if (dec->mem_ == NULL) { 47 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, 48 "no memory during frame initialization."); 49 } 50 dec->mem_size_ = needed; 51 } 52 53 mem = (uint8_t*)dec->mem_; 54 dec->intra_t_ = (uint8_t*)mem; 55 mem += intra_pred_mode_size; 56 57 dec->y_t_ = (uint8_t*)mem; 58 mem += 16 * mb_w; 59 dec->u_t_ = (uint8_t*)mem; 60 mem += 8 * mb_w; 61 dec->v_t_ = (uint8_t*)mem; 62 mem += 8 * mb_w; 63 64 dec->mb_info_ = ((VP8MB*)mem) + 1; 65 mem += info_size; 66 67 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK); 68 assert((yuv_size & ALIGN_MASK) == 0); 69 dec->yuv_b_ = (uint8_t*)mem; 70 mem += yuv_size; 71 72 dec->coeffs_ = (int16_t*)mem; 73 mem += coeffs_size; 74 75 dec->cache_y_stride_ = 16 * mb_w; 76 dec->cache_uv_stride_ = 8 * mb_w; 77 { 78 const int extra_rows = kFilterExtraRows[dec->filter_type_]; 79 const int extra_y = extra_rows * dec->cache_y_stride_; 80 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_; 81 dec->cache_y_ = ((uint8_t*)mem) + extra_y; 82 dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv; 83 dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv; 84 } 85 mem += cache_size; 86 87 // note: left-info is initialized once for all. 88 memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_)); 89 90 // initialize top 91 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size); 92 93 // prepare 'io' 94 io->width = dec->pic_hdr_.width_; 95 io->height = dec->pic_hdr_.height_; 96 io->mb_y = 0; 97 io->y = dec->cache_y_; 98 io->u = dec->cache_u_; 99 io->v = dec->cache_v_; 100 io->y_stride = dec->cache_y_stride_; 101 io->uv_stride = dec->cache_uv_stride_; 102 io->fancy_upscaling = 0; // default 103 104 // Init critical function pointers and look-up tables. 105 VP8DspInitTables(); 106 VP8DspInit(); 107 108 return 1; 109} 110 111//----------------------------------------------------------------------------- 112// Filtering 113 114static inline int hev_thresh_from_level(int level, int keyframe) { 115 if (keyframe) { 116 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0; 117 } else { 118 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0; 119 } 120} 121 122static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) { 123 VP8MB* const mb = dec->mb_info_ + mb_x; 124 uint8_t* const y_dst = dec->cache_y_ + mb_x * 16; 125 const int y_bps = dec->cache_y_stride_; 126 const int level = mb->f_level_; 127 const int ilevel = mb->f_ilevel_; 128 const int limit = 2 * level + ilevel; 129 if (dec->filter_type_ == 1) { // simple 130 if (mb_x > 0) { 131 VP8SimpleHFilter16(y_dst, y_bps, limit + 4); 132 } 133 if (mb->f_inner_) { 134 VP8SimpleHFilter16i(y_dst, y_bps, limit); 135 } 136 if (mb_y > 0) { 137 VP8SimpleVFilter16(y_dst, y_bps, limit + 4); 138 } 139 if (mb->f_inner_) { 140 VP8SimpleVFilter16i(y_dst, y_bps, limit); 141 } 142 } else { // complex 143 uint8_t* const u_dst = dec->cache_u_ + mb_x * 8; 144 uint8_t* const v_dst = dec->cache_v_ + mb_x * 8; 145 const int uv_bps = dec->cache_uv_stride_; 146 const int hev_thresh = 147 hev_thresh_from_level(level, dec->frm_hdr_.key_frame_); 148 if (mb_x > 0) { 149 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); 150 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); 151 } 152 if (mb->f_inner_) { 153 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); 154 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); 155 } 156 if (mb_y > 0) { 157 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh); 158 VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh); 159 } 160 if (mb->f_inner_) { 161 VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh); 162 VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh); 163 } 164 } 165} 166 167void VP8StoreBlock(VP8Decoder* const dec) { 168 if (dec->filter_type_ > 0) { 169 VP8MB* const info = dec->mb_info_ + dec->mb_x_; 170 int level = dec->filter_levels_[dec->segment_]; 171 if (dec->filter_hdr_.use_lf_delta_) { 172 // TODO(skal): only CURRENT is handled for now. 173 level += dec->filter_hdr_.ref_lf_delta_[0]; 174 if (dec->is_i4x4_) { 175 level += dec->filter_hdr_.mode_lf_delta_[0]; 176 } 177 } 178 level = (level < 0) ? 0 : (level > 63) ? 63 : level; 179 info->f_level_ = level; 180 181 if (dec->filter_hdr_.sharpness_ > 0) { 182 if (dec->filter_hdr_.sharpness_ > 4) { 183 level >>= 2; 184 } else { 185 level >>= 1; 186 } 187 if (level > 9 - dec->filter_hdr_.sharpness_) { 188 level = 9 - dec->filter_hdr_.sharpness_; 189 } 190 } 191 192 info->f_ilevel_ = (level < 1) ? 1 : level; 193 info->f_inner_ = (!info->skip_ || dec->is_i4x4_); 194 } 195 { 196 // Transfer samples to row cache 197 int y; 198 uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16; 199 uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8; 200 uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8; 201 for (y = 0; y < 16; ++y) { 202 memcpy(ydst + y * dec->cache_y_stride_, 203 dec->yuv_b_ + Y_OFF + y * BPS, 16); 204 } 205 for (y = 0; y < 8; ++y) { 206 memcpy(udst + y * dec->cache_uv_stride_, 207 dec->yuv_b_ + U_OFF + y * BPS, 8); 208 memcpy(vdst + y * dec->cache_uv_stride_, 209 dec->yuv_b_ + V_OFF + y * BPS, 8); 210 } 211 } 212} 213 214int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) { 215 const int extra_y_rows = kFilterExtraRows[dec->filter_type_]; 216 const int ysize = extra_y_rows * dec->cache_y_stride_; 217 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_; 218 const int first_row = (dec->mb_y_ == 0); 219 const int last_row = (dec->mb_y_ >= dec->mb_h_ - 1); 220 uint8_t* const ydst = dec->cache_y_ - ysize; 221 uint8_t* const udst = dec->cache_u_ - uvsize; 222 uint8_t* const vdst = dec->cache_v_ - uvsize; 223 if (dec->filter_type_ > 0) { 224 int mb_x; 225 for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) { 226 DoFilter(dec, mb_x, dec->mb_y_); 227 } 228 } 229 if (io->put) { 230 int y_start = dec->mb_y_ * 16; 231 int y_end = y_start + 16; 232 if (!first_row) { 233 y_start -= extra_y_rows; 234 io->y = ydst; 235 io->u = udst; 236 io->v = vdst; 237 } else { 238 io->y = dec->cache_y_; 239 io->u = dec->cache_u_; 240 io->v = dec->cache_v_; 241 } 242 if (!last_row) { 243 y_end -= extra_y_rows; 244 } 245 if (y_end > io->height) { 246 y_end = io->height; 247 } 248 io->mb_y = y_start; 249 io->mb_h = y_end - y_start; 250 if (!io->put(io)) { 251 return 0; 252 } 253 } 254 // rotate top samples 255 if (!last_row) { 256 memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize); 257 memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize); 258 memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize); 259 } 260 return 1; 261} 262 263//----------------------------------------------------------------------------- 264// Main reconstruction function. 265 266static const int kScan[16] = { 267 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, 268 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS, 269 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS, 270 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS 271}; 272 273static inline int CheckMode(VP8Decoder* const dec, int mode) { 274 if (mode == B_DC_PRED) { 275 if (dec->mb_x_ == 0) { 276 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT; 277 } else { 278 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED; 279 } 280 } 281 return mode; 282} 283 284static inline void Copy32b(uint8_t* dst, uint8_t* src) { 285 *(uint32_t*)dst = *(uint32_t*)src; 286} 287 288void VP8ReconstructBlock(VP8Decoder* const dec) { 289 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; 290 uint8_t* const u_dst = dec->yuv_b_ + U_OFF; 291 uint8_t* const v_dst = dec->yuv_b_ + V_OFF; 292 293 // Rotate in the left samples from previously decoded block. We move four 294 // pixels at a time for alignment reason, and because of in-loop filter. 295 if (dec->mb_x_ > 0) { 296 int j; 297 for (j = -1; j < 16; ++j) { 298 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); 299 } 300 for (j = -1; j < 8; ++j) { 301 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]); 302 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); 303 } 304 } else { 305 int j; 306 for (j = 0; j < 16; ++j) { 307 y_dst[j * BPS - 1] = 129; 308 } 309 for (j = 0; j < 8; ++j) { 310 u_dst[j * BPS - 1] = 129; 311 v_dst[j * BPS - 1] = 129; 312 } 313 // Init top-left sample on left column too 314 if (dec->mb_y_ > 0) { 315 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129; 316 } 317 } 318 { 319 // bring top samples into the cache 320 uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16; 321 uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8; 322 uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8; 323 const int16_t* coeffs = dec->coeffs_; 324 int n; 325 326 if (dec->mb_y_ > 0) { 327 memcpy(y_dst - BPS, top_y, 16); 328 memcpy(u_dst - BPS, top_u, 8); 329 memcpy(v_dst - BPS, top_v, 8); 330 } else if (dec->mb_x_ == 0) { 331 // we only need to do this init once at block (0,0). 332 // Afterward, it remains valid for the whole topmost row. 333 memset(y_dst - BPS - 1, 127, 16 + 4 + 1); 334 memset(u_dst - BPS - 1, 127, 8 + 1); 335 memset(v_dst - BPS - 1, 127, 8 + 1); 336 } 337 338 // predict and add residuals 339 340 if (dec->is_i4x4_) { // 4x4 341 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16); 342 343 if (dec->mb_y_ > 0) { 344 if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border 345 top_right[0] = top_y[15] * 0x01010101u; 346 } else { 347 memcpy(top_right, top_y + 16, sizeof(*top_right)); 348 } 349 } 350 // replicate the top-right pixels below 351 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0]; 352 353 // predict and add residues for all 4x4 blocks in turn. 354 for (n = 0; n < 16; n++) { 355 uint8_t* const dst = y_dst + kScan[n]; 356 VP8PredLuma4[dec->imodes_[n]](dst); 357 if (dec->non_zero_ & (1 << n)) { 358 VP8Transform(coeffs + n * 16, dst); 359 } else if (dec->non_zero_ & (1 << n)) { // only DC is present 360 VP8TransformDC(coeffs + n * 16, dst); 361 } 362 } 363 } else { // 16x16 364 const int pred_func = CheckMode(dec, dec->imodes_[0]); 365 VP8PredLuma16[pred_func](y_dst); 366 if (dec->non_zero_) { 367 for (n = 0; n < 16; n++) { 368 uint8_t* const dst = y_dst + kScan[n]; 369 if (dec->non_zero_ac_ & (1 << n)) { 370 VP8Transform(coeffs + n * 16, dst); 371 } else if (dec->non_zero_ & (1 << n)) { // only DC is present 372 VP8TransformDC(coeffs + n * 16, dst); 373 } 374 } 375 } 376 } 377 { 378 // Chroma 379 const int pred_func = CheckMode(dec, dec->uvmode_); 380 VP8PredChroma8[pred_func](u_dst); 381 VP8PredChroma8[pred_func](v_dst); 382 383 if (dec->non_zero_ & 0x0f0000) { // chroma-U 384 const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16; 385 if (dec->non_zero_ac_ & 0x0f0000) { 386 VP8TransformUV(u_coeffs, u_dst); 387 } else { 388 VP8TransformDCUV(u_coeffs, u_dst); 389 } 390 } 391 if (dec->non_zero_ & 0xf00000) { // chroma-V 392 const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16; 393 if (dec->non_zero_ac_ & 0xf00000) { 394 VP8TransformUV(v_coeffs, v_dst); 395 } else { 396 VP8TransformDCUV(v_coeffs, v_dst); 397 } 398 } 399 400 // stash away top samples for next block 401 if (dec->mb_y_ < dec->mb_h_ - 1) { 402 memcpy(top_y, y_dst + 15 * BPS, 16); 403 memcpy(top_u, u_dst + 7 * BPS, 8); 404 memcpy(top_v, v_dst + 7 * BPS, 8); 405 } 406 } 407 } 408} 409 410//----------------------------------------------------------------------------- 411 412#if defined(__cplusplus) || defined(c_plusplus) 413} // extern "C" 414#endif 415