1// Copyright 2010 Google Inc.
2//
3// This code is licensed under the same terms as WebM:
4//  Software License Agreement:  http://www.webmproject.org/license/software/
5//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
6// -----------------------------------------------------------------------------
7//
8// Frame-reconstruction function. Memory allocation.
9//
10// Author: Skal (pascal.massimino@gmail.com)
11
12#include <stdlib.h>
13#include "vp8i.h"
14
15#if defined(__cplusplus) || defined(c_plusplus)
16extern "C" {
17#endif
18
19#define ALIGN_MASK (32 - 1)
20
21//-----------------------------------------------------------------------------
22// Memory setup
23
24// how many extra luma lines are needed for caching, given a filtering level
25static const uint8_t kFilterExtraRows[3] = { 0, 4, 8 };
26
27int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
28  const int mb_w = dec->mb_w_;
29  const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
30  const int top_size = (16 + 8 + 8) * mb_w;
31  const int info_size = (mb_w + 1) * sizeof(VP8MB);
32  const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
33  const int coeffs_size = 384 * sizeof(*dec->coeffs_);
34  const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
35  const int cache_size = top_size * cache_height;
36  const int needed = intra_pred_mode_size
37                   + top_size + info_size
38                   + yuv_size + coeffs_size
39                   + cache_size + ALIGN_MASK;
40  uint8_t* mem;
41
42  if (needed > dec->mem_size_) {
43    free(dec->mem_);
44    dec->mem_size_ = 0;
45    dec->mem_ = (uint8_t*)malloc(needed);
46    if (dec->mem_ == NULL) {
47      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
48                         "no memory during frame initialization.");
49    }
50    dec->mem_size_ = needed;
51  }
52
53  mem = (uint8_t*)dec->mem_;
54  dec->intra_t_ = (uint8_t*)mem;
55  mem += intra_pred_mode_size;
56
57  dec->y_t_ = (uint8_t*)mem;
58  mem += 16 * mb_w;
59  dec->u_t_ = (uint8_t*)mem;
60  mem += 8 * mb_w;
61  dec->v_t_ = (uint8_t*)mem;
62  mem += 8 * mb_w;
63
64  dec->mb_info_ = ((VP8MB*)mem) + 1;
65  mem += info_size;
66
67  mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
68  assert((yuv_size & ALIGN_MASK) == 0);
69  dec->yuv_b_ = (uint8_t*)mem;
70  mem += yuv_size;
71
72  dec->coeffs_ = (int16_t*)mem;
73  mem += coeffs_size;
74
75  dec->cache_y_stride_ = 16 * mb_w;
76  dec->cache_uv_stride_ = 8 * mb_w;
77  {
78    const int extra_rows = kFilterExtraRows[dec->filter_type_];
79    const int extra_y = extra_rows * dec->cache_y_stride_;
80    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
81    dec->cache_y_ = ((uint8_t*)mem) + extra_y;
82    dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv;
83    dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv;
84  }
85  mem += cache_size;
86
87  // note: left-info is initialized once for all.
88  memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_));
89
90  // initialize top
91  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
92
93  // prepare 'io'
94  io->width = dec->pic_hdr_.width_;
95  io->height = dec->pic_hdr_.height_;
96  io->mb_y = 0;
97  io->y = dec->cache_y_;
98  io->u = dec->cache_u_;
99  io->v = dec->cache_v_;
100  io->y_stride = dec->cache_y_stride_;
101  io->uv_stride = dec->cache_uv_stride_;
102  io->fancy_upscaling = 0;    // default
103
104  // Init critical function pointers and look-up tables.
105  VP8DspInitTables();
106  VP8DspInit();
107
108  return 1;
109}
110
111//-----------------------------------------------------------------------------
112// Filtering
113
114static inline int hev_thresh_from_level(int level, int keyframe) {
115  if (keyframe) {
116    return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
117  } else {
118    return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
119  }
120}
121
122static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) {
123  VP8MB* const mb = dec->mb_info_ + mb_x;
124  uint8_t* const y_dst = dec->cache_y_ + mb_x * 16;
125  const int y_bps = dec->cache_y_stride_;
126  const int level = mb->f_level_;
127  const int ilevel = mb->f_ilevel_;
128  const int limit = 2 * level + ilevel;
129  if (dec->filter_type_ == 1) {   // simple
130    if (mb_x > 0) {
131      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
132    }
133    if (mb->f_inner_) {
134      VP8SimpleHFilter16i(y_dst, y_bps, limit);
135    }
136    if (mb_y > 0) {
137      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
138    }
139    if (mb->f_inner_) {
140      VP8SimpleVFilter16i(y_dst, y_bps, limit);
141    }
142  } else {    // complex
143    uint8_t* const u_dst = dec->cache_u_ + mb_x * 8;
144    uint8_t* const v_dst = dec->cache_v_ + mb_x * 8;
145    const int uv_bps = dec->cache_uv_stride_;
146    const int hev_thresh =
147        hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
148    if (mb_x > 0) {
149      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
150      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
151    }
152    if (mb->f_inner_) {
153      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
154      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
155    }
156    if (mb_y > 0) {
157      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
158      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
159    }
160    if (mb->f_inner_) {
161      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
162      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
163    }
164  }
165}
166
167void VP8StoreBlock(VP8Decoder* const dec) {
168  if (dec->filter_type_ > 0) {
169    VP8MB* const info = dec->mb_info_ + dec->mb_x_;
170    int level = dec->filter_levels_[dec->segment_];
171    if (dec->filter_hdr_.use_lf_delta_) {
172      // TODO(skal): only CURRENT is handled for now.
173      level += dec->filter_hdr_.ref_lf_delta_[0];
174      if (dec->is_i4x4_) {
175        level += dec->filter_hdr_.mode_lf_delta_[0];
176      }
177    }
178    level = (level < 0) ? 0 : (level > 63) ? 63 : level;
179    info->f_level_ = level;
180
181    if (dec->filter_hdr_.sharpness_ > 0) {
182      if (dec->filter_hdr_.sharpness_ > 4) {
183        level >>= 2;
184      } else {
185        level >>= 1;
186      }
187      if (level > 9 - dec->filter_hdr_.sharpness_) {
188        level = 9 - dec->filter_hdr_.sharpness_;
189      }
190    }
191
192    info->f_ilevel_ = (level < 1) ? 1 : level;
193    info->f_inner_ = (!info->skip_ || dec->is_i4x4_);
194  }
195  {
196    // Transfer samples to row cache
197    int y;
198    uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16;
199    uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8;
200    uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8;
201    for (y = 0; y < 16; ++y) {
202      memcpy(ydst + y * dec->cache_y_stride_,
203             dec->yuv_b_ + Y_OFF + y * BPS, 16);
204    }
205    for (y = 0; y < 8; ++y) {
206      memcpy(udst + y * dec->cache_uv_stride_,
207           dec->yuv_b_ + U_OFF + y * BPS, 8);
208      memcpy(vdst + y * dec->cache_uv_stride_,
209           dec->yuv_b_ + V_OFF + y * BPS, 8);
210    }
211  }
212}
213
214int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
215  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
216  const int ysize = extra_y_rows * dec->cache_y_stride_;
217  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
218  const int first_row = (dec->mb_y_ == 0);
219  const int last_row = (dec->mb_y_ >= dec->mb_h_ - 1);
220  uint8_t* const ydst = dec->cache_y_ - ysize;
221  uint8_t* const udst = dec->cache_u_ - uvsize;
222  uint8_t* const vdst = dec->cache_v_ - uvsize;
223  if (dec->filter_type_ > 0) {
224    int mb_x;
225    for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
226      DoFilter(dec, mb_x, dec->mb_y_);
227    }
228  }
229  if (io->put) {
230    int y_start = dec->mb_y_ * 16;
231    int y_end = y_start + 16;
232    if (!first_row) {
233      y_start -= extra_y_rows;
234      io->y = ydst;
235      io->u = udst;
236      io->v = vdst;
237    } else {
238      io->y = dec->cache_y_;
239      io->u = dec->cache_u_;
240      io->v = dec->cache_v_;
241    }
242    if (!last_row) {
243      y_end -= extra_y_rows;
244    }
245    if (y_end > io->height) {
246      y_end = io->height;
247    }
248    io->mb_y = y_start;
249    io->mb_h = y_end - y_start;
250    if (!io->put(io)) {
251      return 0;
252    }
253  }
254    // rotate top samples
255  if (!last_row) {
256    memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize);
257    memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize);
258    memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize);
259  }
260  return 1;
261}
262
263//-----------------------------------------------------------------------------
264// Main reconstruction function.
265
266static const int kScan[16] = {
267  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
268  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
269  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
270  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
271};
272
273static inline int CheckMode(VP8Decoder* const dec, int mode) {
274  if (mode == B_DC_PRED) {
275    if (dec->mb_x_ == 0) {
276      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
277    } else {
278      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
279    }
280  }
281  return mode;
282}
283
284static inline void Copy32b(uint8_t* dst, uint8_t* src) {
285  *(uint32_t*)dst = *(uint32_t*)src;
286}
287
288void VP8ReconstructBlock(VP8Decoder* const dec) {
289  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
290  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
291  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
292
293  // Rotate in the left samples from previously decoded block. We move four
294  // pixels at a time for alignment reason, and because of in-loop filter.
295  if (dec->mb_x_ > 0) {
296    int j;
297    for (j = -1; j < 16; ++j) {
298      Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
299    }
300    for (j = -1; j < 8; ++j) {
301      Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
302      Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
303    }
304  } else {
305    int j;
306    for (j = 0; j < 16; ++j) {
307      y_dst[j * BPS - 1] = 129;
308    }
309    for (j = 0; j < 8; ++j) {
310      u_dst[j * BPS - 1] = 129;
311      v_dst[j * BPS - 1] = 129;
312    }
313    // Init top-left sample on left column too
314    if (dec->mb_y_ > 0) {
315      y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
316    }
317  }
318  {
319    // bring top samples into the cache
320    uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
321    uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
322    uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
323    const int16_t* coeffs = dec->coeffs_;
324    int n;
325
326    if (dec->mb_y_ > 0) {
327      memcpy(y_dst - BPS, top_y, 16);
328      memcpy(u_dst - BPS, top_u, 8);
329      memcpy(v_dst - BPS, top_v, 8);
330    } else if (dec->mb_x_ == 0) {
331      // we only need to do this init once at block (0,0).
332      // Afterward, it remains valid for the whole topmost row.
333      memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
334      memset(u_dst - BPS - 1, 127, 8 + 1);
335      memset(v_dst - BPS - 1, 127, 8 + 1);
336    }
337
338    // predict and add residuals
339
340    if (dec->is_i4x4_) {   // 4x4
341      uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
342
343      if (dec->mb_y_ > 0) {
344        if (dec->mb_x_ >= dec->mb_w_ - 1) {    // on rightmost border
345          top_right[0] = top_y[15] * 0x01010101u;
346        } else {
347          memcpy(top_right, top_y + 16, sizeof(*top_right));
348        }
349      }
350      // replicate the top-right pixels below
351      top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
352
353      // predict and add residues for all 4x4 blocks in turn.
354      for (n = 0; n < 16; n++) {
355        uint8_t* const dst = y_dst + kScan[n];
356        VP8PredLuma4[dec->imodes_[n]](dst);
357        if (dec->non_zero_ & (1 << n)) {
358          VP8Transform(coeffs + n * 16, dst);
359        } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
360          VP8TransformDC(coeffs + n * 16, dst);
361        }
362      }
363    } else {    // 16x16
364      const int pred_func = CheckMode(dec, dec->imodes_[0]);
365      VP8PredLuma16[pred_func](y_dst);
366      if (dec->non_zero_) {
367        for (n = 0; n < 16; n++) {
368          uint8_t* const dst = y_dst + kScan[n];
369          if (dec->non_zero_ac_ & (1 << n)) {
370            VP8Transform(coeffs + n * 16, dst);
371          } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
372            VP8TransformDC(coeffs + n * 16, dst);
373          }
374        }
375      }
376    }
377    {
378      // Chroma
379      const int pred_func = CheckMode(dec, dec->uvmode_);
380      VP8PredChroma8[pred_func](u_dst);
381      VP8PredChroma8[pred_func](v_dst);
382
383      if (dec->non_zero_ & 0x0f0000) {   // chroma-U
384        const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
385        if (dec->non_zero_ac_ & 0x0f0000) {
386          VP8TransformUV(u_coeffs, u_dst);
387        } else {
388          VP8TransformDCUV(u_coeffs, u_dst);
389        }
390      }
391      if (dec->non_zero_ & 0xf00000) {   // chroma-V
392        const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
393        if (dec->non_zero_ac_ & 0xf00000) {
394          VP8TransformUV(v_coeffs, v_dst);
395        } else {
396          VP8TransformDCUV(v_coeffs, v_dst);
397        }
398      }
399
400      // stash away top samples for next block
401      if (dec->mb_y_ < dec->mb_h_ - 1) {
402        memcpy(top_y, y_dst + 15 * BPS, 16);
403        memcpy(top_u, u_dst +  7 * BPS,  8);
404        memcpy(top_v, v_dst +  7 * BPS,  8);
405      }
406    }
407  }
408}
409
410//-----------------------------------------------------------------------------
411
412#if defined(__cplusplus) || defined(c_plusplus)
413}    // extern "C"
414#endif
415