1// Copyright 2011 Google Inc.
2//
3// This code is licensed under the same terms as WebM:
4//  Software License Agreement:  http://www.webmproject.org/license/software/
5//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
6// -----------------------------------------------------------------------------
7//
8//   WebP encoder: internal header.
9//
10// Author: Skal (pascal.massimino@gmail.com)
11
12#ifndef WEBP_ENC_VP8ENCI_H_
13#define WEBP_ENC_VP8ENCI_H_
14
15#include "string.h"     // for memcpy()
16#include "webp/encode.h"
17#include "bit_writer.h"
18
19#if defined(__cplusplus) || defined(c_plusplus)
20extern "C" {
21#endif
22
23//-----------------------------------------------------------------------------
24// Various defines and enums
25
26// intra prediction modes
27enum { B_DC_PRED = 0,   // 4x4 modes
28       B_TM_PRED = 1,
29       B_VE_PRED = 2,
30       B_HE_PRED = 3,
31       B_RD_PRED = 4,
32       B_VR_PRED = 5,
33       B_LD_PRED = 6,
34       B_VL_PRED = 7,
35       B_HD_PRED = 8,
36       B_HU_PRED = 9,
37       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
38
39       // Luma16 or UV modes
40       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
41       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
42     };
43
44enum { NUM_MB_SEGMENTS = 4,
45       MAX_NUM_PARTITIONS = 8,
46       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
47       NUM_BANDS = 8,
48       NUM_CTX = 3,
49       NUM_PROBAS = 11,
50       MAX_LF_LEVELS = 64,      // Maximum loop filter level
51       MAX_VARIABLE_LEVEL = 67  // last (inclusive) level with variable cost
52     };
53
54// YUV-cache parameters. Cache is 16-pixels wide.
55// The original or reconstructed samples can be accessed using VP8Scan[]
56// The predicted blocks can be accessed using offsets to yuv_p_ and
57// the arrays VP8*ModeOffsets[];
58//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
59//  Y_OFF  |YYYY| <- original samples  (enc->yuv_in_)
60//         |YYYY|
61//         |YYYY|
62//         |YYYY|
63//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
64//         |UUVV|
65//         +----+
66//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
67//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
68//         |YYYY|
69//         |YYYY|
70//  U_OFF  |UUVV| V_OFF
71//         |UUVV|
72//          x2 (for yuv_out2_)
73//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
74// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
75//         |YYYY|
76//         |YYYY|
77//         |YYYY|
78// I16TM16 |YYYY|
79//         |YYYY|
80//         |YYYY|
81//         |YYYY|
82// I16VE16 |YYYY|
83//         |YYYY|
84//         |YYYY|
85//         |YYYY|
86// I16HE16 |YYYY|
87//         |YYYY|
88//         |YYYY|
89//         |YYYY|
90//         +----+  Chroma U/V predictions (16x8 block each)
91// C8DC8   |UUVV|
92//         |UUVV|
93// C8TM8   |UUVV|
94//         |UUVV|
95// C8VE8   |UUVV|
96//         |UUVV|
97// C8HE8   |UUVV|
98//         |UUVV|
99//         +----+  Intra 4x4 predictions (4x4 block each)
100//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
101//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
102//         |YY..| I4HD4 I4HU4 I4TMP
103//         +----+
104#define BPS       16   // this is the common stride
105#define Y_SIZE   (BPS * 16)
106#define UV_SIZE  (BPS * 8)
107#define YUV_SIZE (Y_SIZE + UV_SIZE)
108#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
109#define Y_OFF    (0)
110#define U_OFF    (Y_SIZE)
111#define V_OFF    (U_OFF + 8)
112#define ALIGN_CST 15
113#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
114
115extern const int VP8Scan[16 + 4 + 4];           // in quant.c
116extern const int VP8UVModeOffsets[4];           // in analyze.c
117extern const int VP8I16ModeOffsets[4];
118extern const int VP8I4ModeOffsets[NUM_BMODES];
119
120// Layout of prediction blocks
121// intra 16x16
122#define I16DC16 (0 * 16 * BPS)
123#define I16TM16 (1 * 16 * BPS)
124#define I16VE16 (2 * 16 * BPS)
125#define I16HE16 (3 * 16 * BPS)
126// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
127#define C8DC8 (4 * 16 * BPS)
128#define C8TM8 (4 * 16 * BPS + 8 * BPS)
129#define C8VE8 (5 * 16 * BPS)
130#define C8HE8 (5 * 16 * BPS + 8 * BPS)
131// intra 4x4
132#define I4DC4 (6 * 16 * BPS +  0)
133#define I4TM4 (6 * 16 * BPS +  4)
134#define I4VE4 (6 * 16 * BPS +  8)
135#define I4HE4 (6 * 16 * BPS + 12)
136#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
137#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
138#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
139#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
140#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
141#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
142#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
143
144typedef int64_t score_t;     // type used for scores, rate, distortion
145#define MAX_COST ((score_t)0x7fffffffffffffLL)
146
147//-----------------------------------------------------------------------------
148// Headers
149
150typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
151typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2];
152typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
153typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
154
155typedef struct VP8Encoder VP8Encoder;
156
157// segment features
158typedef struct {
159  int num_segments_;      // Actual number of segments. 1 segment only = unused.
160  int update_map_;        // whether to update the segment map or not.
161                          // must be 0 if there's only 1 segment.
162  int size_;              // bit-cost for transmitting the segment map
163} VP8SegmentHeader;
164
165// Struct collecting all frame-persistent probabilities.
166typedef struct {
167  uint8_t segments_[3];     // probabilities for segment tree
168  uint8_t skip_proba_;      // final probability of being skipped.
169  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 924 bytes
170  StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 7.4k
171  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 11.4k
172  int use_skip_proba_;      // Note: we always use skip_proba for now.
173  int nb_skip_, nb_i4_, nb_i16_;   // block type counters
174} VP8Proba;
175
176// Filter parameters. Not actually used in the code (we don't perform
177// the in-loop filtering), but filled from user's config
178typedef struct {
179  int simple_;             // filtering type: 0=complex, 1=simple
180  int level_;              // base filter level [0..63]
181  int sharpness_;          // [0..7]
182  int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
183} VP8FilterHeader;
184
185//-----------------------------------------------------------------------------
186// Informations about the macroblocks.
187
188typedef struct {
189  // block type
190  uint8_t type_:2;     // 0=i4x4, 1=i16x16
191  uint8_t uv_mode_:2;
192  uint8_t skip_:1;
193  uint8_t segment_:2;
194  uint8_t alpha_;      // quantization-susceptibility
195} VP8MBInfo;
196
197typedef struct {
198  uint16_t q_[16];        // quantizer steps
199  uint16_t iq_[16];       // reciprocals, fixed point.
200  uint16_t bias_[16];     // rounding bias
201  uint16_t zthresh_[16];  // value under which a coefficient is zeroed
202  uint16_t sharpen_[16];  // frequency boosters for slight sharpening
203} VP8Matrix;
204
205typedef struct {
206  VP8Matrix y1_, y2_, uv_;  // quantization matrices
207  int alpha_;      // quant-susceptibility, range [-127,127]. Zero is neutral.
208                   // Lower values indicate a lower risk of blurriness.
209  int beta_;       // filter-susceptibility, range [0,255].
210  int quant_;      // final segment quantizer.
211  int fstrength_;  // final in-loop filtering strength
212  // reactivities
213  int lambda_i16_, lambda_i4_, lambda_uv_;
214  int lambda_mode_, lambda_trellis_, tlambda_;
215  int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
216} VP8SegmentInfo;
217
218// Handy transcient struct to accumulate score and info during RD-optimization
219// and mode evaluation.
220typedef struct {
221  score_t D, SD, R, score;    // Distortion, spectral distortion, rate, score.
222  int16_t y_dc_levels[16];    // Quantized levels for luma-DC, luma-AC, chroma.
223  int16_t y_ac_levels[16][16];
224  int16_t uv_levels[4 + 4][16];
225  int mode_i16;               // mode number for intra16 prediction
226  int modes_i4[16];           // mode numbers for intra4 predictions
227  int mode_uv;                // mode number of chroma prediction
228  uint32_t nz;                // non-zero blocks
229} VP8ModeScore;
230
231// Iterator structure to iterate through macroblocks, pointing to the
232// right neighbouring data (samples, predictions, contexts, ...)
233typedef struct {
234  int x_, y_;                      // current macroblock
235  int y_offset_, uv_offset_;       // offset to the luma / chroma planes
236  int y_stride_, uv_stride_;       // respective strides
237  uint8_t*      yuv_in_;           // borrowed from enc_ (for now)
238  uint8_t*      yuv_out_;          // ''
239  uint8_t*      yuv_out2_;         // ''
240  uint8_t*      yuv_p_;            // ''
241  VP8Encoder*   enc_;              // back-pointer
242  VP8MBInfo*    mb_;               // current macroblock
243  VP8BitWriter* bw_;               // current bit-writer
244  uint8_t*      preds_;            // intra mode predictors (4x4 blocks)
245  uint32_t*     nz_;               // non-zero pattern
246  uint8_t       i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
247  uint8_t*      i4_top_;           // pointer to the current *top boundary sample
248  int           i4_;               // current intra4x4 mode being tested
249  int           top_nz_[9];        // top-non-zero context.
250  int           left_nz_[9];       // left-non-zero. left_nz[8] is independent.
251  uint64_t      bit_count_[4][3];  // bit counters for coded levels.
252  uint64_t      luma_bits_;        // macroblock bit-cost for luma
253  uint64_t      uv_bits_;          // macroblock bit-cost for chroma
254  LFStats*      lf_stats_;         // filter stats (borrowed from enc_)
255  int           do_trellis_;       // if true, perform extra level optimisation
256  int           done_;             // true when scan is finished
257} VP8EncIterator;
258
259  // in iterator.c
260// must be called first.
261void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
262// restart a scan.
263void VP8IteratorReset(VP8EncIterator* const it);
264// import samples from source
265void VP8IteratorImport(const VP8EncIterator* const it);
266// export decimated samples
267void VP8IteratorExport(const VP8EncIterator* const it);
268// go to next macroblock. Returns !done_. If *block_to_save is non-null, will
269// save the boundary values to top_/left_ arrays. block_to_save can be
270// it->yuv_out_ or it->yuv_in_.
271int VP8IteratorNext(VP8EncIterator* const it,
272                    const uint8_t* const block_to_save);
273// Intra4x4 iterations
274void VP8IteratorStartI4(VP8EncIterator* const it);
275// returns true if not done.
276int VP8IteratorRotateI4(VP8EncIterator* const it,
277                        const uint8_t* const yuv_out);
278
279// Non-zero context setup/teardown
280void VP8IteratorNzToBytes(VP8EncIterator* const it);
281void VP8IteratorBytesToNz(VP8EncIterator* const it);
282
283// Helper functions to set mode properties
284void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
285void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]);
286void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
287void VP8SetSkip(const VP8EncIterator* const it, int skip);
288void VP8SetSegment(const VP8EncIterator* const it, int segment);
289void VP8IteratorResetCosts(VP8EncIterator* const it);
290
291//-----------------------------------------------------------------------------
292// VP8Encoder
293
294struct VP8Encoder {
295  const WebPConfig* config_;    // user configuration and parameters
296  WebPPicture* pic_;            // input / output picture
297
298  // headers
299  VP8FilterHeader   filter_hdr_;     // filtering information
300  VP8SegmentHeader  segment_hdr_;    // segment information
301
302  int profile_;                      // VP8's profile, deduced from Config.
303
304  // dimension, in macroblock units.
305  int mb_w_, mb_h_;
306  int preds_w_;   // stride of the *preds_ prediction plane (=4*mb_w + 1)
307
308  // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
309  int num_parts_;
310
311  // per-partition boolean decoders.
312  VP8BitWriter bw_;                         // part0
313  VP8BitWriter parts_[MAX_NUM_PARTITIONS];  // token partitions
314
315  // quantization info (one set of DC/AC dequant factor per segment)
316  VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
317  int base_quant_;                 // nominal quantizer value. Only used
318                                   // for relative coding of segments' quant.
319  int uv_alpha_;                   // U/V quantization susceptibility
320  // global offset of quantizers, shared by all segments
321  int dq_y1_dc_;
322  int dq_y2_dc_, dq_y2_ac_;
323  int dq_uv_dc_, dq_uv_ac_;
324
325  // probabilities and statistics
326  VP8Proba proba_;
327  uint64_t sse_[3];        // sum of Y/U/V squared errors for all macroblocks
328  uint64_t sse_count_;     // pixel count for the sse_[] stats
329  int      coded_size_;
330  int      residual_bytes_[3][4];
331  int      block_count_[3];
332
333  // quality/speed settings
334  int method_;             // 0=fastest, 6=best/slowest.
335  int rd_opt_level_;       // Deduced from method_.
336
337  // Memory
338  VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
339  uint8_t*   preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
340  uint32_t*  nz_;        // non-zero bit context: mb_w+1
341  uint8_t*   yuv_in_;    // input samples
342  uint8_t*   yuv_out_;   // output samples
343  uint8_t*   yuv_out2_;  // secondary scratch out-buffer. swapped with yuv_out_.
344  uint8_t*   yuv_p_;     // scratch buffer for prediction
345  uint8_t   *y_top_;     // top luma samples.
346  uint8_t   *uv_top_;    // top u/v samples.
347                         // U and V are packed into 16 pixels (8 U + 8 V)
348  uint8_t   *y_left_;    // left luma samples (adressable from index -1 to 15).
349  uint8_t   *u_left_;    // left u samples (adressable from index -1 to 7)
350  uint8_t   *v_left_;    // left v samples (adressable from index -1 to 7)
351
352  LFStats   *lf_stats_;  // autofilter stats (if NULL, autofilter is off)
353};
354
355//-----------------------------------------------------------------------------
356// internal functions. Not public.
357
358  // in tree.c
359extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
360extern const uint8_t
361    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
362// Reset the token probabilities to their initial (default) values
363void VP8DefaultProbas(VP8Encoder* const enc);
364// Write the token probabilities
365void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
366// Writes the partition #0 modes (that is: all intra modes)
367void VP8CodeIntraModes(VP8Encoder* const enc);
368
369  // in syntax.c
370// Generates the final bitstream by coding the partition0 and headers,
371// and appending an assembly of all the pre-coded token partitions.
372// Return true if everything is ok.
373int VP8EncWrite(VP8Encoder* const enc);
374
375  // in frame.c
376extern const uint8_t VP8EncBands[16 + 1];
377// Form all the four Intra16x16 predictions in the yuv_p_ cache
378void VP8MakeLuma16Preds(const VP8EncIterator* const it);
379// Form all the four Chroma8x8 predictions in the yuv_p_ cache
380void VP8MakeChroma8Preds(const VP8EncIterator* const it);
381// Form all the ten Intra4x4 predictions in the yuv_p_ cache
382// for the 4x4 block it->i4_
383void VP8MakeIntra4Preds(const VP8EncIterator* const it);
384// Rate calculation
385int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
386int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
387int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
388// Main stat / coding passes
389int VP8EncLoop(VP8Encoder* const enc);
390int VP8StatLoop(VP8Encoder* const enc);
391
392  // in analysis.c
393// Main analysis loop. Decides the segmentations and complexity.
394// Assigns a first guess for Intra16 and uvmode_ prediction modes.
395int VP8EncAnalyze(VP8Encoder* const enc);
396
397  // in quant.c
398// Sets up segment's quantization values, base_quant_ and filter strengths.
399void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
400// Pick best modes and fills the levels. Returns true if skipped.
401int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
402
403  // in dsp.c
404// Transforms
405typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst);
406typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
407typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
408extern VP8Idct VP8ITransform;
409extern VP8Fdct VP8FTransform;
410extern VP8WHT VP8ITransformWHT;
411extern VP8WHT VP8FTransformWHT;
412// Predictions
413// *dst is the destination block. *top, *top_right and *left can be NULL.
414typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
415                              const uint8_t* top);
416typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
417extern VP8Intra4Preds VP8EncPredLuma4;
418extern VP8IntraPreds VP8EncPredLuma16;
419extern VP8IntraPreds VP8EncPredChroma8;
420
421typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
422extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
423typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
424                          const uint16_t* const weights);
425extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
426
427typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
428extern VP8BlockCopy VP8Copy4x4;
429extern VP8BlockCopy VP8Copy8x8;
430extern VP8BlockCopy VP8Copy16x16;
431
432void VP8EncDspInit();   // must be called before using anything from the above.
433
434  // in filter.c
435extern void VP8InitFilter(VP8EncIterator* const it);
436extern void VP8StoreFilterStats(VP8EncIterator* const it);
437extern void VP8AdjustFilterStrength(VP8EncIterator* const it);
438
439//-----------------------------------------------------------------------------
440
441#if defined(__cplusplus) || defined(c_plusplus)
442}    // extern "C"
443#endif
444
445#endif  // WEBP_ENC_VP8ENCI_H_
446