1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/intmath.h"
22#include "avcodec.h"
23#include "dsputil.h"
24#include "dwt.h"
25#include "snow.h"
26
27#include "rangecoder.h"
28#include "mathops.h"
29
30#include "mpegvideo.h"
31#include "h263.h"
32
33#undef NDEBUG
34#include <assert.h>
35
36static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53};
54static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71};
72static const int8_t quant3bA[256]={
73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89};
90static const int8_t quant5[256]={
91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107};
108static const int8_t quant7[256]={
109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125};
126static const int8_t quant9[256]={
127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143};
144static const int8_t quant11[256]={
145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161};
162static const int8_t quant13[256]={
163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179};
180
181#if 0 //64*cubic
182static const uint8_t obmc32[1024]={
183  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
184  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
185  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
186  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
187  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
188  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
189  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
190  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
191  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
192  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
193  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
194  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
195  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
196  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
197  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
199  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
200  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
201  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
202  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
203  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
204  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
205  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
206  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
207  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
208  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
209  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
210  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
211  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
212  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
213  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
214  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
215//error:0.000022
216};
217static const uint8_t obmc16[256]={
218  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
219  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
220  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
221  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
222  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
223  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
224  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
226  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
227  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
228  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
229  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
230  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
231  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
232  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
233  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
234//error:0.000033
235};
236#elif 1 // 64*linear
237static const uint8_t obmc32[1024]={
238  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
239  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
240  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
241  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
242  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
243  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
244  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
245  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
246  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
247  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
248  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
249  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
250  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
251  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
252  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
254  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
255  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
256  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
257  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
258  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
259  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
260  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
261  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
262  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
263  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
264  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
265  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
266  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
267  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
268  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
269  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
270 //error:0.000020
271};
272static const uint8_t obmc16[256]={
273  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
274  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
275  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
276  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
277  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
285  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
286  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
287  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
288  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
289//error:0.000015
290};
291#else //64*cos
292static const uint8_t obmc32[1024]={
293  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
294  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
295  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
296  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
297  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
298  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
299  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
300  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
301  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
302  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
303  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
304  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
305  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
306  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
307  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
309  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
310  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
311  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
312  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
313  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
314  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
315  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
316  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
317  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
318  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
319  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
320  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
321  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
322  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
323  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
324  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
325//error:0.000022
326};
327static const uint8_t obmc16[256]={
328  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
329  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
330  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
331  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
332  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
333  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
334  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
336  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
337  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
338  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
339  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
340  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
341  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
342  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
343  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
344//error:0.000022
345};
346#endif /* 0 */
347
348//linear *64
349static const uint8_t obmc8[64]={
350  4, 12, 20, 28, 28, 20, 12,  4,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 20, 60,100,140,140,100, 60, 20,
353 28, 84,140,196,196,140, 84, 28,
354 28, 84,140,196,196,140, 84, 28,
355 20, 60,100,140,140,100, 60, 20,
356 12, 36, 60, 84, 84, 60, 36, 12,
357  4, 12, 20, 28, 28, 20, 12,  4,
358//error:0.000000
359};
360
361//linear *64
362static const uint8_t obmc4[16]={
363 16, 48, 48, 16,
364 48,144,144, 48,
365 48,144,144, 48,
366 16, 48, 48, 16,
367//error:0.000000
368};
369
370static const uint8_t * const obmc_tab[4]={
371    obmc32, obmc16, obmc8, obmc4
372};
373
374static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
375
376typedef struct BlockNode{
377    int16_t mx;
378    int16_t my;
379    uint8_t ref;
380    uint8_t color[3];
381    uint8_t type;
382//#define TYPE_SPLIT    1
383#define BLOCK_INTRA   1
384#define BLOCK_OPT     2
385//#define TYPE_NOCOLOR  4
386    uint8_t level; //FIXME merge into type?
387}BlockNode;
388
389static const BlockNode null_block= { //FIXME add border maybe
390    .color= {128,128,128},
391    .mx= 0,
392    .my= 0,
393    .ref= 0,
394    .type= 0,
395    .level= 0,
396};
397
398#define LOG2_MB_SIZE 4
399#define MB_SIZE (1<<LOG2_MB_SIZE)
400#define ENCODER_EXTRA_BITS 4
401#define HTAPS_MAX 8
402
403typedef struct x_and_coeff{
404    int16_t x;
405    uint16_t coeff;
406} x_and_coeff;
407
408typedef struct SubBand{
409    int level;
410    int stride;
411    int width;
412    int height;
413    int qlog;        ///< log(qscale)/log[2^(1/6)]
414    DWTELEM *buf;
415    IDWTELEM *ibuf;
416    int buf_x_offset;
417    int buf_y_offset;
418    int stride_line; ///< Stride measured in lines, not pixels.
419    x_and_coeff * x_coeff;
420    struct SubBand *parent;
421    uint8_t state[/*7*2*/ 7 + 512][32];
422}SubBand;
423
424typedef struct Plane{
425    int width;
426    int height;
427    SubBand band[MAX_DECOMPOSITIONS][4];
428
429    int htaps;
430    int8_t hcoeff[HTAPS_MAX/2];
431    int diag_mc;
432    int fast_mc;
433
434    int last_htaps;
435    int8_t last_hcoeff[HTAPS_MAX/2];
436    int last_diag_mc;
437}Plane;
438
439typedef struct SnowContext{
440
441    AVCodecContext *avctx;
442    RangeCoder c;
443    DSPContext dsp;
444    DWTContext dwt;
445    AVFrame new_picture;
446    AVFrame input_picture;              ///< new_picture with the internal linesizes
447    AVFrame current_picture;
448    AVFrame last_picture[MAX_REF_FRAMES];
449    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450    AVFrame mconly_picture;
451//     uint8_t q_context[16];
452    uint8_t header_state[32];
453    uint8_t block_state[128 + 32*128];
454    int keyframe;
455    int always_reset;
456    int version;
457    int spatial_decomposition_type;
458    int last_spatial_decomposition_type;
459    int temporal_decomposition_type;
460    int spatial_decomposition_count;
461    int last_spatial_decomposition_count;
462    int temporal_decomposition_count;
463    int max_ref_frames;
464    int ref_frames;
465    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466    uint32_t *ref_scores[MAX_REF_FRAMES];
467    DWTELEM *spatial_dwt_buffer;
468    IDWTELEM *spatial_idwt_buffer;
469    int colorspace_type;
470    int chroma_h_shift;
471    int chroma_v_shift;
472    int spatial_scalability;
473    int qlog;
474    int last_qlog;
475    int lambda;
476    int lambda2;
477    int pass1_rc;
478    int mv_scale;
479    int last_mv_scale;
480    int qbias;
481    int last_qbias;
482#define QBIAS_SHIFT 3
483    int b_width;
484    int b_height;
485    int block_max_depth;
486    int last_block_max_depth;
487    Plane plane[MAX_PLANES];
488    BlockNode *block;
489#define ME_CACHE_SIZE 1024
490    int me_cache[ME_CACHE_SIZE];
491    int me_cache_generation;
492    slice_buffer sb;
493
494    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
495
496    uint8_t *scratchbuf;
497}SnowContext;
498
499#ifdef __sgi
500// Avoid a name clash on SGI IRIX
501#undef qexp
502#endif
503#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504static uint8_t qexp[QROOT];
505
506static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
507    int i;
508
509    if(v){
510        const int a= FFABS(v);
511        const int e= av_log2(a);
512#if 1
513        const int el= FFMIN(e, 10);
514        put_rac(c, state+0, 0);
515
516        for(i=0; i<el; i++){
517            put_rac(c, state+1+i, 1);  //1..10
518        }
519        for(; i<e; i++){
520            put_rac(c, state+1+9, 1);  //1..10
521        }
522        put_rac(c, state+1+FFMIN(i,9), 0);
523
524        for(i=e-1; i>=el; i--){
525            put_rac(c, state+22+9, (a>>i)&1); //22..31
526        }
527        for(; i>=0; i--){
528            put_rac(c, state+22+i, (a>>i)&1); //22..31
529        }
530
531        if(is_signed)
532            put_rac(c, state+11 + el, v < 0); //11..21
533#else
534
535        put_rac(c, state+0, 0);
536        if(e<=9){
537            for(i=0; i<e; i++){
538                put_rac(c, state+1+i, 1);  //1..10
539            }
540            put_rac(c, state+1+i, 0);
541
542            for(i=e-1; i>=0; i--){
543                put_rac(c, state+22+i, (a>>i)&1); //22..31
544            }
545
546            if(is_signed)
547                put_rac(c, state+11 + e, v < 0); //11..21
548        }else{
549            for(i=0; i<e; i++){
550                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
551            }
552            put_rac(c, state+1+9, 0);
553
554            for(i=e-1; i>=0; i--){
555                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
556            }
557
558            if(is_signed)
559                put_rac(c, state+11 + 10, v < 0); //11..21
560        }
561#endif /* 1 */
562    }else{
563        put_rac(c, state+0, 1);
564    }
565}
566
567static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568    if(get_rac(c, state+0))
569        return 0;
570    else{
571        int i, e, a;
572        e= 0;
573        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
574            e++;
575        }
576
577        a= 1;
578        for(i=e-1; i>=0; i--){
579            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
580        }
581
582        e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
583        return (a^e)-e;
584    }
585}
586
587static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
588    int i;
589    int r= log2>=0 ? 1<<log2 : 1;
590
591    assert(v>=0);
592    assert(log2>=-4);
593
594    while(v >= r){
595        put_rac(c, state+4+log2, 1);
596        v -= r;
597        log2++;
598        if(log2>0) r+=r;
599    }
600    put_rac(c, state+4+log2, 0);
601
602    for(i=log2-1; i>=0; i--){
603        put_rac(c, state+31-i, (v>>i)&1);
604    }
605}
606
607static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
608    int i;
609    int r= log2>=0 ? 1<<log2 : 1;
610    int v=0;
611
612    assert(log2>=-4);
613
614    while(get_rac(c, state+4+log2)){
615        v+= r;
616        log2++;
617        if(log2>0) r+=r;
618    }
619
620    for(i=log2-1; i>=0; i--){
621        v+= get_rac(c, state+31-i)<<i;
622    }
623
624    return v;
625}
626
627static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628    const int w= b->width;
629    const int h= b->height;
630    int x,y;
631
632    int run, runs;
633    x_and_coeff *xc= b->x_coeff;
634    x_and_coeff *prev_xc= NULL;
635    x_and_coeff *prev2_xc= xc;
636    x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637    x_and_coeff *prev_parent_xc= parent_xc;
638
639    runs= get_symbol2(&s->c, b->state[30], 0);
640    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
641    else           run= INT_MAX;
642
643    for(y=0; y<h; y++){
644        int v=0;
645        int lt=0, t=0, rt=0;
646
647        if(y && prev_xc->x == 0){
648            rt= prev_xc->coeff;
649        }
650        for(x=0; x<w; x++){
651            int p=0;
652            const int l= v;
653
654            lt= t; t= rt;
655
656            if(y){
657                if(prev_xc->x <= x)
658                    prev_xc++;
659                if(prev_xc->x == x + 1)
660                    rt= prev_xc->coeff;
661                else
662                    rt=0;
663            }
664            if(parent_xc){
665                if(x>>1 > parent_xc->x){
666                    parent_xc++;
667                }
668                if(x>>1 == parent_xc->x){
669                    p= parent_xc->coeff;
670                }
671            }
672            if(/*ll|*/l|lt|t|rt|p){
673                int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
674
675                v=get_rac(&s->c, &b->state[0][context]);
676                if(v){
677                    v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
679
680                    xc->x=x;
681                    (xc++)->coeff= v;
682                }
683            }else{
684                if(!run){
685                    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
686                    else           run= INT_MAX;
687                    v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
689
690                    xc->x=x;
691                    (xc++)->coeff= v;
692                }else{
693                    int max_run;
694                    run--;
695                    v=0;
696
697                    if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698                    else  max_run= FFMIN(run, w-x-1);
699                    if(parent_xc)
700                        max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
701                    x+= max_run;
702                    run-= max_run;
703                }
704            }
705        }
706        (xc++)->x= w+1; //end marker
707        prev_xc= prev2_xc;
708        prev2_xc= xc;
709
710        if(parent_xc){
711            if(y&1){
712                while(parent_xc->x != parent->width+1)
713                    parent_xc++;
714                parent_xc++;
715                prev_parent_xc= parent_xc;
716            }else{
717                parent_xc= prev_parent_xc;
718            }
719        }
720    }
721
722    (xc++)->x= w+1; //end marker
723}
724
725static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726    const int w= b->width;
727    int y;
728    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
731    int new_index = 0;
732
733    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
734        qadd= 0;
735        qmul= 1<<QEXPSHIFT;
736    }
737
738    /* If we are on the second or later slice, restore our index. */
739    if (start_y != 0)
740        new_index = save_state[0];
741
742
743    for(y=start_y; y<h; y++){
744        int x = 0;
745        int v;
746        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747        memset(line, 0, b->width*sizeof(IDWTELEM));
748        v = b->x_coeff[new_index].coeff;
749        x = b->x_coeff[new_index++].x;
750        while(x < w){
751            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752            register int u= -(v&1);
753            line[x] = (t^u) - u;
754
755            v = b->x_coeff[new_index].coeff;
756            x = b->x_coeff[new_index++].x;
757        }
758    }
759
760    /* Save our variables for the next slice. */
761    save_state[0] = new_index;
762
763    return;
764}
765
766static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767    int plane_index, level, orientation;
768
769    for(plane_index=0; plane_index<3; plane_index++){
770        for(level=0; level<MAX_DECOMPOSITIONS; level++){
771            for(orientation=level ? 1:0; orientation<4; orientation++){
772                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
773            }
774        }
775    }
776    memset(s->header_state, MID_STATE, sizeof(s->header_state));
777    memset(s->block_state, MID_STATE, sizeof(s->block_state));
778}
779
780static int alloc_blocks(SnowContext *s){
781    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
783
784    s->b_width = w;
785    s->b_height= h;
786
787    av_free(s->block);
788    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
789    return 0;
790}
791
792static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793    uint8_t *bytestream= d->bytestream;
794    uint8_t *bytestream_start= d->bytestream_start;
795    *d= *s;
796    d->bytestream= bytestream;
797    d->bytestream_start= bytestream_start;
798}
799
800static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801    const int w= s->b_width << s->block_max_depth;
802    const int rem_depth= s->block_max_depth - level;
803    const int index= (x + y*w) << rem_depth;
804    const int block_w= 1<<rem_depth;
805    BlockNode block;
806    int i,j;
807
808    block.color[0]= l;
809    block.color[1]= cb;
810    block.color[2]= cr;
811    block.mx= mx;
812    block.my= my;
813    block.ref= ref;
814    block.type= type;
815    block.level= level;
816
817    for(j=0; j<block_w; j++){
818        for(i=0; i<block_w; i++){
819            s->block[index + i + j*w]= block;
820        }
821    }
822}
823
824static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825    const int offset[3]= {
826          y*c->  stride + x,
827        ((y*c->uvstride + x)>>1),
828        ((y*c->uvstride + x)>>1),
829    };
830    int i;
831    for(i=0; i<3; i++){
832        c->src[0][i]= src [i];
833        c->ref[0][i]= ref [i] + offset[i];
834    }
835    assert(!ref_index);
836}
837
838static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840    if(s->ref_frames == 1){
841        *mx = mid_pred(left->mx, top->mx, tr->mx);
842        *my = mid_pred(left->my, top->my, tr->my);
843    }else{
844        const int *scale = scale_mv_ref[ref];
845        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846                       (top ->mx * scale[top ->ref] + 128) >>8,
847                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
848        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849                       (top ->my * scale[top ->ref] + 128) >>8,
850                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
851    }
852}
853
854static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
857    }else{
858        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
859    }
860}
861
862static void decode_q_branch(SnowContext *s, int level, int x, int y){
863    const int w= s->b_width << s->block_max_depth;
864    const int rem_depth= s->block_max_depth - level;
865    const int index= (x + y*w) << rem_depth;
866    int trx= (x+1)<<rem_depth;
867    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
868    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
869    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
870    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
872
873    if(s->keyframe){
874        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
875        return;
876    }
877
878    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
879        int type, mx, my;
880        int l = left->color[0];
881        int cb= left->color[1];
882        int cr= left->color[2];
883        int ref = 0;
884        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
887
888        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
889
890        if(type){
891            pred_mv(s, &mx, &my, 0, left, top, tr);
892            l += get_symbol(&s->c, &s->block_state[32], 1);
893            cb+= get_symbol(&s->c, &s->block_state[64], 1);
894            cr+= get_symbol(&s->c, &s->block_state[96], 1);
895        }else{
896            if(s->ref_frames > 1)
897                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898            pred_mv(s, &mx, &my, ref, left, top, tr);
899            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
901        }
902        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
903    }else{
904        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
908    }
909}
910
911static void decode_blocks(SnowContext *s){
912    int x, y;
913    int w= s->b_width;
914    int h= s->b_height;
915
916    for(y=0; y<h; y++){
917        for(x=0; x<w; x++){
918            decode_q_branch(s, 0, x, y);
919        }
920    }
921}
922
923static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
924    static const uint8_t weight[64]={
925    8,7,6,5,4,3,2,1,
926    7,7,0,0,0,0,0,1,
927    6,0,6,0,0,0,2,0,
928    5,0,0,5,0,3,0,0,
929    4,0,0,0,4,0,0,0,
930    3,0,0,5,0,3,0,0,
931    2,0,6,0,0,0,2,0,
932    1,7,0,0,0,0,0,1,
933    };
934
935    static const uint8_t brane[256]={
936    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
952    };
953
954    static const uint8_t needs[16]={
955    0,1,0,0,
956    2,4,2,0,
957    0,1,0,0,
958    15
959    };
960
961    int x, y, b, r, l;
962    int16_t tmpIt   [64*(32+HTAPS_MAX)];
963    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964    int16_t *tmpI= tmpIt;
965    uint8_t *tmp2= tmp2t[0];
966    const uint8_t *hpel[11];
967    assert(dx<16 && dy<16);
968    r= brane[dx + 16*dy]&15;
969    l= brane[dx + 16*dy]>>4;
970
971    b= needs[l] | needs[r];
972    if(p && !p->diag_mc)
973        b= 15;
974
975    if(b&5){
976        for(y=0; y < b_h+HTAPS_MAX-1; y++){
977            for(x=0; x < b_w; x++){
978                int a_1=src[x + HTAPS_MAX/2-4];
979                int a0= src[x + HTAPS_MAX/2-3];
980                int a1= src[x + HTAPS_MAX/2-2];
981                int a2= src[x + HTAPS_MAX/2-1];
982                int a3= src[x + HTAPS_MAX/2+0];
983                int a4= src[x + HTAPS_MAX/2+1];
984                int a5= src[x + HTAPS_MAX/2+2];
985                int a6= src[x + HTAPS_MAX/2+3];
986                int am=0;
987                if(!p || p->fast_mc){
988                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
989                    tmpI[x]= am;
990                    am= (am+16)>>5;
991                }else{
992                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
993                    tmpI[x]= am;
994                    am= (am+32)>>6;
995                }
996
997                if(am&(~255)) am= ~(am>>31);
998                tmp2[x]= am;
999            }
1000            tmpI+= 64;
1001            tmp2+= stride;
1002            src += stride;
1003        }
1004        src -= stride*y;
1005    }
1006    src += HTAPS_MAX/2 - 1;
1007    tmp2= tmp2t[1];
1008
1009    if(b&2){
1010        for(y=0; y < b_h; y++){
1011            for(x=0; x < b_w+1; x++){
1012                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013                int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014                int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015                int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016                int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017                int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018                int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019                int a6= src[x + (HTAPS_MAX/2+3)*stride];
1020                int am=0;
1021                if(!p || p->fast_mc)
1022                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1023                else
1024                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1025
1026                if(am&(~255)) am= ~(am>>31);
1027                tmp2[x]= am;
1028            }
1029            src += stride;
1030            tmp2+= stride;
1031        }
1032        src -= stride*y;
1033    }
1034    src += stride*(HTAPS_MAX/2 - 1);
1035    tmp2= tmp2t[2];
1036    tmpI= tmpIt;
1037    if(b&4){
1038        for(y=0; y < b_h; y++){
1039            for(x=0; x < b_w; x++){
1040                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1048                int am=0;
1049                if(!p || p->fast_mc)
1050                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1051                else
1052                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053                if(am&(~255)) am= ~(am>>31);
1054                tmp2[x]= am;
1055            }
1056            tmpI+= 64;
1057            tmp2+= stride;
1058        }
1059    }
1060
1061    hpel[ 0]= src;
1062    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1063    hpel[ 2]= src + 1;
1064
1065    hpel[ 4]= tmp2t[1];
1066    hpel[ 5]= tmp2t[2];
1067    hpel[ 6]= tmp2t[1] + 1;
1068
1069    hpel[ 8]= src + stride;
1070    hpel[ 9]= hpel[1] + stride;
1071    hpel[10]= hpel[8] + 1;
1072
1073    if(b==15){
1074        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
1075        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1078        dx&=7;
1079        dy&=7;
1080        for(y=0; y < b_h; y++){
1081            for(x=0; x < b_w; x++){
1082                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
1084            }
1085            src1+=stride;
1086            src2+=stride;
1087            src3+=stride;
1088            src4+=stride;
1089            dst +=stride;
1090        }
1091    }else{
1092        const uint8_t *src1= hpel[l];
1093        const uint8_t *src2= hpel[r];
1094        int a= weight[((dx&7) + (8*(dy&7)))];
1095        int b= 8-a;
1096        for(y=0; y < b_h; y++){
1097            for(x=0; x < b_w; x++){
1098                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1099            }
1100            src1+=stride;
1101            src2+=stride;
1102            dst +=stride;
1103        }
1104    }
1105}
1106
1107#define mca(dx,dy,b_w)\
1108static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1109    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1110    assert(h==b_w);\
1111    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1112}
1113
1114mca( 0, 0,16)
1115mca( 8, 0,16)
1116mca( 0, 8,16)
1117mca( 8, 8,16)
1118mca( 0, 0,8)
1119mca( 8, 0,8)
1120mca( 0, 8,8)
1121mca( 8, 8,8)
1122
1123static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1124    if(block->type & BLOCK_INTRA){
1125        int x, y;
1126        const int color = block->color[plane_index];
1127        const int color4= color*0x01010101;
1128        if(b_w==32){
1129            for(y=0; y < b_h; y++){
1130                *(uint32_t*)&dst[0 + y*stride]= color4;
1131                *(uint32_t*)&dst[4 + y*stride]= color4;
1132                *(uint32_t*)&dst[8 + y*stride]= color4;
1133                *(uint32_t*)&dst[12+ y*stride]= color4;
1134                *(uint32_t*)&dst[16+ y*stride]= color4;
1135                *(uint32_t*)&dst[20+ y*stride]= color4;
1136                *(uint32_t*)&dst[24+ y*stride]= color4;
1137                *(uint32_t*)&dst[28+ y*stride]= color4;
1138            }
1139        }else if(b_w==16){
1140            for(y=0; y < b_h; y++){
1141                *(uint32_t*)&dst[0 + y*stride]= color4;
1142                *(uint32_t*)&dst[4 + y*stride]= color4;
1143                *(uint32_t*)&dst[8 + y*stride]= color4;
1144                *(uint32_t*)&dst[12+ y*stride]= color4;
1145            }
1146        }else if(b_w==8){
1147            for(y=0; y < b_h; y++){
1148                *(uint32_t*)&dst[0 + y*stride]= color4;
1149                *(uint32_t*)&dst[4 + y*stride]= color4;
1150            }
1151        }else if(b_w==4){
1152            for(y=0; y < b_h; y++){
1153                *(uint32_t*)&dst[0 + y*stride]= color4;
1154            }
1155        }else{
1156            for(y=0; y < b_h; y++){
1157                for(x=0; x < b_w; x++){
1158                    dst[x + y*stride]= color;
1159                }
1160            }
1161        }
1162    }else{
1163        uint8_t *src= s->last_picture[block->ref].data[plane_index];
1164        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
1165        int mx= block->mx*scale;
1166        int my= block->my*scale;
1167        const int dx= mx&15;
1168        const int dy= my&15;
1169        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1170        sx += (mx>>4) - (HTAPS_MAX/2-1);
1171        sy += (my>>4) - (HTAPS_MAX/2-1);
1172        src += sx + sy*stride;
1173        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1174           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1175            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1176            src= tmp + MB_SIZE;
1177        }
1178//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1179//        assert(!(b_w&(b_w-1)));
1180        assert(b_w>1 && b_h>1);
1181        assert((tab_index>=0 && tab_index<4) || b_w==32);
1182        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1183            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1184        else if(b_w==32){
1185            int y;
1186            for(y=0; y<b_h; y+=16){
1187                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1188                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1189            }
1190        }else if(b_w==b_h)
1191            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1192        else if(b_w==2*b_h){
1193            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
1194            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1195        }else{
1196            assert(2*b_w==b_h);
1197            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
1198            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1199        }
1200    }
1201}
1202
1203void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1204                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1205    int y, x;
1206    IDWTELEM * dst;
1207    for(y=0; y<b_h; y++){
1208        //FIXME ugly misuse of obmc_stride
1209        const uint8_t *obmc1= obmc + y*obmc_stride;
1210        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1211        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1212        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1213        dst = slice_buffer_get_line(sb, src_y + y);
1214        for(x=0; x<b_w; x++){
1215            int v=   obmc1[x] * block[3][x + y*src_stride]
1216                    +obmc2[x] * block[2][x + y*src_stride]
1217                    +obmc3[x] * block[1][x + y*src_stride]
1218                    +obmc4[x] * block[0][x + y*src_stride];
1219
1220            v <<= 8 - LOG2_OBMC_MAX;
1221            if(FRAC_BITS != 8){
1222                v >>= 8 - FRAC_BITS;
1223            }
1224            if(add){
1225                v += dst[x + src_x];
1226                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1227                if(v&(~255)) v= ~(v>>31);
1228                dst8[x + y*src_stride] = v;
1229            }else{
1230                dst[x + src_x] -= v;
1231            }
1232        }
1233    }
1234}
1235
1236//FIXME name cleanup (b_w, block_w, b_width stuff)
1237static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1238    const int b_width = s->b_width  << s->block_max_depth;
1239    const int b_height= s->b_height << s->block_max_depth;
1240    const int b_stride= b_width;
1241    BlockNode *lt= &s->block[b_x + b_y*b_stride];
1242    BlockNode *rt= lt+1;
1243    BlockNode *lb= lt+b_stride;
1244    BlockNode *rb= lb+1;
1245    uint8_t *block[4];
1246    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1247    uint8_t *tmp = s->scratchbuf;
1248    uint8_t *ptmp;
1249    int x,y;
1250
1251    if(b_x<0){
1252        lt= rt;
1253        lb= rb;
1254    }else if(b_x + 1 >= b_width){
1255        rt= lt;
1256        rb= lb;
1257    }
1258    if(b_y<0){
1259        lt= lb;
1260        rt= rb;
1261    }else if(b_y + 1 >= b_height){
1262        lb= lt;
1263        rb= rt;
1264    }
1265
1266    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1267        obmc -= src_x;
1268        b_w += src_x;
1269        if(!sliced && !offset_dst)
1270            dst -= src_x;
1271        src_x=0;
1272    }else if(src_x + b_w > w){
1273        b_w = w - src_x;
1274    }
1275    if(src_y<0){
1276        obmc -= src_y*obmc_stride;
1277        b_h += src_y;
1278        if(!sliced && !offset_dst)
1279            dst -= src_y*dst_stride;
1280        src_y=0;
1281    }else if(src_y + b_h> h){
1282        b_h = h - src_y;
1283    }
1284
1285    if(b_w<=0 || b_h<=0) return;
1286
1287    assert(src_stride > 2*MB_SIZE + 5);
1288
1289    if(!sliced && offset_dst)
1290        dst += src_x + src_y*dst_stride;
1291    dst8+= src_x + src_y*src_stride;
1292//    src += src_x + src_y*src_stride;
1293
1294    ptmp= tmp + 3*tmp_step;
1295    block[0]= ptmp;
1296    ptmp+=tmp_step;
1297    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1298
1299    if(same_block(lt, rt)){
1300        block[1]= block[0];
1301    }else{
1302        block[1]= ptmp;
1303        ptmp+=tmp_step;
1304        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1305    }
1306
1307    if(same_block(lt, lb)){
1308        block[2]= block[0];
1309    }else if(same_block(rt, lb)){
1310        block[2]= block[1];
1311    }else{
1312        block[2]= ptmp;
1313        ptmp+=tmp_step;
1314        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1315    }
1316
1317    if(same_block(lt, rb) ){
1318        block[3]= block[0];
1319    }else if(same_block(rt, rb)){
1320        block[3]= block[1];
1321    }else if(same_block(lb, rb)){
1322        block[3]= block[2];
1323    }else{
1324        block[3]= ptmp;
1325        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1326    }
1327#if 0
1328    for(y=0; y<b_h; y++){
1329        for(x=0; x<b_w; x++){
1330            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1331            if(add) dst[x + y*dst_stride] += v;
1332            else    dst[x + y*dst_stride] -= v;
1333        }
1334    }
1335    for(y=0; y<b_h; y++){
1336        uint8_t *obmc2= obmc + (obmc_stride>>1);
1337        for(x=0; x<b_w; x++){
1338            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1339            if(add) dst[x + y*dst_stride] += v;
1340            else    dst[x + y*dst_stride] -= v;
1341        }
1342    }
1343    for(y=0; y<b_h; y++){
1344        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1345        for(x=0; x<b_w; x++){
1346            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1347            if(add) dst[x + y*dst_stride] += v;
1348            else    dst[x + y*dst_stride] -= v;
1349        }
1350    }
1351    for(y=0; y<b_h; y++){
1352        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1353        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1354        for(x=0; x<b_w; x++){
1355            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1356            if(add) dst[x + y*dst_stride] += v;
1357            else    dst[x + y*dst_stride] -= v;
1358        }
1359    }
1360#else
1361    if(sliced){
1362        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1363    }else{
1364        for(y=0; y<b_h; y++){
1365            //FIXME ugly misuse of obmc_stride
1366            const uint8_t *obmc1= obmc + y*obmc_stride;
1367            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1368            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1369            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1370            for(x=0; x<b_w; x++){
1371                int v=   obmc1[x] * block[3][x + y*src_stride]
1372                        +obmc2[x] * block[2][x + y*src_stride]
1373                        +obmc3[x] * block[1][x + y*src_stride]
1374                        +obmc4[x] * block[0][x + y*src_stride];
1375
1376                v <<= 8 - LOG2_OBMC_MAX;
1377                if(FRAC_BITS != 8){
1378                    v >>= 8 - FRAC_BITS;
1379                }
1380                if(add){
1381                    v += dst[x + y*dst_stride];
1382                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1383                    if(v&(~255)) v= ~(v>>31);
1384                    dst8[x + y*src_stride] = v;
1385                }else{
1386                    dst[x + y*dst_stride] -= v;
1387                }
1388            }
1389        }
1390    }
1391#endif /* 0 */
1392}
1393
1394static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1395    Plane *p= &s->plane[plane_index];
1396    const int mb_w= s->b_width  << s->block_max_depth;
1397    const int mb_h= s->b_height << s->block_max_depth;
1398    int x, y, mb_x;
1399    int block_size = MB_SIZE >> s->block_max_depth;
1400    int block_w    = plane_index ? block_size/2 : block_size;
1401    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1402    int obmc_stride= plane_index ? block_size : 2*block_size;
1403    int ref_stride= s->current_picture.linesize[plane_index];
1404    uint8_t *dst8= s->current_picture.data[plane_index];
1405    int w= p->width;
1406    int h= p->height;
1407
1408    if(s->keyframe || (s->avctx->debug&512)){
1409        if(mb_y==mb_h)
1410            return;
1411
1412        if(add){
1413            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1414//                DWTELEM * line = slice_buffer_get_line(sb, y);
1415                IDWTELEM * line = sb->line[y];
1416                for(x=0; x<w; x++){
1417//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1419                    v >>= FRAC_BITS;
1420                    if(v&(~255)) v= ~(v>>31);
1421                    dst8[x + y*ref_stride]= v;
1422                }
1423            }
1424        }else{
1425            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1426//                DWTELEM * line = slice_buffer_get_line(sb, y);
1427                IDWTELEM * line = sb->line[y];
1428                for(x=0; x<w; x++){
1429                    line[x] -= 128 << FRAC_BITS;
1430//                    buf[x + y*w]-= 128<<FRAC_BITS;
1431                }
1432            }
1433        }
1434
1435        return;
1436    }
1437
1438    for(mb_x=0; mb_x<=mb_w; mb_x++){
1439        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1440                   block_w*mb_x - block_w/2,
1441                   block_w*mb_y - block_w/2,
1442                   block_w, block_w,
1443                   w, h,
1444                   w, ref_stride, obmc_stride,
1445                   mb_x - 1, mb_y - 1,
1446                   add, 0, plane_index);
1447    }
1448}
1449
1450static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1451    Plane *p= &s->plane[plane_index];
1452    const int mb_w= s->b_width  << s->block_max_depth;
1453    const int mb_h= s->b_height << s->block_max_depth;
1454    int x, y, mb_x;
1455    int block_size = MB_SIZE >> s->block_max_depth;
1456    int block_w    = plane_index ? block_size/2 : block_size;
1457    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1458    const int obmc_stride= plane_index ? block_size : 2*block_size;
1459    int ref_stride= s->current_picture.linesize[plane_index];
1460    uint8_t *dst8= s->current_picture.data[plane_index];
1461    int w= p->width;
1462    int h= p->height;
1463
1464    if(s->keyframe || (s->avctx->debug&512)){
1465        if(mb_y==mb_h)
1466            return;
1467
1468        if(add){
1469            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1470                for(x=0; x<w; x++){
1471                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1472                    v >>= FRAC_BITS;
1473                    if(v&(~255)) v= ~(v>>31);
1474                    dst8[x + y*ref_stride]= v;
1475                }
1476            }
1477        }else{
1478            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1479                for(x=0; x<w; x++){
1480                    buf[x + y*w]-= 128<<FRAC_BITS;
1481                }
1482            }
1483        }
1484
1485        return;
1486    }
1487
1488    for(mb_x=0; mb_x<=mb_w; mb_x++){
1489        add_yblock(s, 0, NULL, buf, dst8, obmc,
1490                   block_w*mb_x - block_w/2,
1491                   block_w*mb_y - block_w/2,
1492                   block_w, block_w,
1493                   w, h,
1494                   w, ref_stride, obmc_stride,
1495                   mb_x - 1, mb_y - 1,
1496                   add, 1, plane_index);
1497    }
1498}
1499
1500static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1501    const int mb_h= s->b_height << s->block_max_depth;
1502    int mb_y;
1503    for(mb_y=0; mb_y<=mb_h; mb_y++)
1504        predict_slice(s, buf, plane_index, add, mb_y);
1505}
1506
1507static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1508    const int w= b->width;
1509    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1510    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1511    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1512    int x,y;
1513
1514    if(s->qlog == LOSSLESS_QLOG) return;
1515
1516    for(y=start_y; y<end_y; y++){
1517//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1518        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1519        for(x=0; x<w; x++){
1520            int i= line[x];
1521            if(i<0){
1522                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1523            }else if(i>0){
1524                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1525            }
1526        }
1527    }
1528}
1529
1530static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1531    const int w= b->width;
1532    int x,y;
1533
1534    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1535    IDWTELEM * prev;
1536
1537    if (start_y != 0)
1538        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1539
1540    for(y=start_y; y<end_y; y++){
1541        prev = line;
1542//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1543        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1544        for(x=0; x<w; x++){
1545            if(x){
1546                if(use_median){
1547                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1548                    else  line[x] += line[x - 1];
1549                }else{
1550                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1551                    else  line[x] += line[x - 1];
1552                }
1553            }else{
1554                if(y) line[x] += prev[x];
1555            }
1556        }
1557    }
1558}
1559
1560static void decode_qlogs(SnowContext *s){
1561    int plane_index, level, orientation;
1562
1563    for(plane_index=0; plane_index<3; plane_index++){
1564        for(level=0; level<s->spatial_decomposition_count; level++){
1565            for(orientation=level ? 1:0; orientation<4; orientation++){
1566                int q;
1567                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1568                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1569                else                    q= get_symbol(&s->c, s->header_state, 1);
1570                s->plane[plane_index].band[level][orientation].qlog= q;
1571            }
1572        }
1573    }
1574}
1575
1576#define GET_S(dst, check) \
1577    tmp= get_symbol(&s->c, s->header_state, 0);\
1578    if(!(check)){\
1579        av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1580        return -1;\
1581    }\
1582    dst= tmp;
1583
1584static int decode_header(SnowContext *s){
1585    int plane_index, tmp;
1586    uint8_t kstate[32];
1587
1588    memset(kstate, MID_STATE, sizeof(kstate));
1589
1590    s->keyframe= get_rac(&s->c, kstate);
1591    if(s->keyframe || s->always_reset){
1592        reset_contexts(s);
1593        s->spatial_decomposition_type=
1594        s->qlog=
1595        s->qbias=
1596        s->mv_scale=
1597        s->block_max_depth= 0;
1598    }
1599    if(s->keyframe){
1600        GET_S(s->version, tmp <= 0U)
1601        s->always_reset= get_rac(&s->c, s->header_state);
1602        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1603        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1604        GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1605        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1606        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1607        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1608        s->spatial_scalability= get_rac(&s->c, s->header_state);
1609//        s->rate_scalability= get_rac(&s->c, s->header_state);
1610        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1611        s->max_ref_frames++;
1612
1613        decode_qlogs(s);
1614    }
1615
1616    if(!s->keyframe){
1617        if(get_rac(&s->c, s->header_state)){
1618            for(plane_index=0; plane_index<2; plane_index++){
1619                int htaps, i, sum=0;
1620                Plane *p= &s->plane[plane_index];
1621                p->diag_mc= get_rac(&s->c, s->header_state);
1622                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1623                if((unsigned)htaps > HTAPS_MAX || htaps==0)
1624                    return -1;
1625                p->htaps= htaps;
1626                for(i= htaps/2; i; i--){
1627                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1628                    sum += p->hcoeff[i];
1629                }
1630                p->hcoeff[0]= 32-sum;
1631            }
1632            s->plane[2].diag_mc= s->plane[1].diag_mc;
1633            s->plane[2].htaps  = s->plane[1].htaps;
1634            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1635        }
1636        if(get_rac(&s->c, s->header_state)){
1637            GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1638            decode_qlogs(s);
1639        }
1640    }
1641
1642    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1643    if(s->spatial_decomposition_type > 1U){
1644        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1645        return -1;
1646    }
1647    if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1648             s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1649        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1650        return -1;
1651    }
1652
1653    s->qlog           += get_symbol(&s->c, s->header_state, 1);
1654    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
1655    s->qbias          += get_symbol(&s->c, s->header_state, 1);
1656    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1657    if(s->block_max_depth > 1 || s->block_max_depth < 0){
1658        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1659        s->block_max_depth= 0;
1660        return -1;
1661    }
1662
1663    return 0;
1664}
1665
1666static void init_qexp(void){
1667    int i;
1668    double v=128;
1669
1670    for(i=0; i<QROOT; i++){
1671        qexp[i]= lrintf(v);
1672        v *= pow(2, 1.0 / QROOT);
1673    }
1674}
1675
1676static av_cold int common_init(AVCodecContext *avctx){
1677    SnowContext *s = avctx->priv_data;
1678    int width, height;
1679    int i, j;
1680
1681    s->avctx= avctx;
1682    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1683
1684    dsputil_init(&s->dsp, avctx);
1685    ff_dwt_init(&s->dwt);
1686
1687#define mcf(dx,dy)\
1688    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
1689    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1690        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1691    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
1692    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1693        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1694
1695    mcf( 0, 0)
1696    mcf( 4, 0)
1697    mcf( 8, 0)
1698    mcf(12, 0)
1699    mcf( 0, 4)
1700    mcf( 4, 4)
1701    mcf( 8, 4)
1702    mcf(12, 4)
1703    mcf( 0, 8)
1704    mcf( 4, 8)
1705    mcf( 8, 8)
1706    mcf(12, 8)
1707    mcf( 0,12)
1708    mcf( 4,12)
1709    mcf( 8,12)
1710    mcf(12,12)
1711
1712#define mcfh(dx,dy)\
1713    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
1714    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1715        mc_block_hpel ## dx ## dy ## 16;\
1716    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
1717    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1718        mc_block_hpel ## dx ## dy ## 8;
1719
1720    mcfh(0, 0)
1721    mcfh(8, 0)
1722    mcfh(0, 8)
1723    mcfh(8, 8)
1724
1725    if(!qexp[0])
1726        init_qexp();
1727
1728//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1729
1730    width= s->avctx->width;
1731    height= s->avctx->height;
1732
1733    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1734    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1735
1736    for(i=0; i<MAX_REF_FRAMES; i++)
1737        for(j=0; j<MAX_REF_FRAMES; j++)
1738            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1739
1740    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1741    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1742
1743    return 0;
1744}
1745
1746static int common_init_after_header(AVCodecContext *avctx){
1747    SnowContext *s = avctx->priv_data;
1748    int plane_index, level, orientation;
1749
1750    for(plane_index=0; plane_index<3; plane_index++){
1751        int w= s->avctx->width;
1752        int h= s->avctx->height;
1753
1754        if(plane_index){
1755            w>>= s->chroma_h_shift;
1756            h>>= s->chroma_v_shift;
1757        }
1758        s->plane[plane_index].width = w;
1759        s->plane[plane_index].height= h;
1760
1761        for(level=s->spatial_decomposition_count-1; level>=0; level--){
1762            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1763                SubBand *b= &s->plane[plane_index].band[level][orientation];
1764
1765                b->buf= s->spatial_dwt_buffer;
1766                b->level= level;
1767                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1768                b->width = (w + !(orientation&1))>>1;
1769                b->height= (h + !(orientation>1))>>1;
1770
1771                b->stride_line = 1 << (s->spatial_decomposition_count - level);
1772                b->buf_x_offset = 0;
1773                b->buf_y_offset = 0;
1774
1775                if(orientation&1){
1776                    b->buf += (w+1)>>1;
1777                    b->buf_x_offset = (w+1)>>1;
1778                }
1779                if(orientation>1){
1780                    b->buf += b->stride>>1;
1781                    b->buf_y_offset = b->stride_line >> 1;
1782                }
1783                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1784
1785                if(level)
1786                    b->parent= &s->plane[plane_index].band[level-1][orientation];
1787                //FIXME avoid this realloc
1788                av_freep(&b->x_coeff);
1789                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1790            }
1791            w= (w+1)>>1;
1792            h= (h+1)>>1;
1793        }
1794    }
1795
1796    return 0;
1797}
1798
1799#define QUANTIZE2 0
1800
1801#if QUANTIZE2==1
1802#define Q2_STEP 8
1803
1804static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1805    SubBand *b= &p->band[level][orientation];
1806    int x, y;
1807    int xo=0;
1808    int yo=0;
1809    int step= 1 << (s->spatial_decomposition_count - level);
1810
1811    if(orientation&1)
1812        xo= step>>1;
1813    if(orientation&2)
1814        yo= step>>1;
1815
1816    //FIXME bias for nonzero ?
1817    //FIXME optimize
1818    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1819    for(y=0; y<p->height; y++){
1820        for(x=0; x<p->width; x++){
1821            int sx= (x-xo + step/2) / step / Q2_STEP;
1822            int sy= (y-yo + step/2) / step / Q2_STEP;
1823            int v= r0[x + y*p->width] - r1[x + y*p->width];
1824            assert(sx>=0 && sy>=0 && sx < score_stride);
1825            v= ((v+8)>>4)<<4;
1826            score[sx + sy*score_stride] += v*v;
1827            assert(score[sx + sy*score_stride] >= 0);
1828        }
1829    }
1830}
1831
1832static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1833    int level, orientation;
1834
1835    for(level=0; level<s->spatial_decomposition_count; level++){
1836        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1837            SubBand *b= &p->band[level][orientation];
1838            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1839
1840            dequantize(s, b, dst, b->stride);
1841        }
1842    }
1843}
1844
1845static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1846    int level, orientation, ys, xs, x, y, pass;
1847    IDWTELEM best_dequant[height * stride];
1848    IDWTELEM idwt2_buffer[height * stride];
1849    const int score_stride= (width + 10)/Q2_STEP;
1850    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1852    int threshold= (s->m.lambda * s->m.lambda) >> 6;
1853
1854    //FIXME pass the copy cleanly ?
1855
1856//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1857    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1858
1859    for(level=0; level<s->spatial_decomposition_count; level++){
1860        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1861            SubBand *b= &p->band[level][orientation];
1862            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1863             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
1864            assert(src == b->buf); // code does not depend on this but it is true currently
1865
1866            quantize(s, b, dst, src, b->stride, s->qbias);
1867        }
1868    }
1869    for(pass=0; pass<1; pass++){
1870        if(s->qbias == 0) //keyframe
1871            continue;
1872        for(level=0; level<s->spatial_decomposition_count; level++){
1873            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1874                SubBand *b= &p->band[level][orientation];
1875                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1876                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1877
1878                for(ys= 0; ys<Q2_STEP; ys++){
1879                    for(xs= 0; xs<Q2_STEP; xs++){
1880                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1881                        dequantize_all(s, p, idwt2_buffer, width, height);
1882                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1883                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1884                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1885                        for(y=ys; y<b->height; y+= Q2_STEP){
1886                            for(x=xs; x<b->width; x+= Q2_STEP){
1887                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1888                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1889                                //FIXME try more than just --
1890                            }
1891                        }
1892                        dequantize_all(s, p, idwt2_buffer, width, height);
1893                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1894                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1895                        for(y=ys; y<b->height; y+= Q2_STEP){
1896                            for(x=xs; x<b->width; x+= Q2_STEP){
1897                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1898                                if(score[score_idx] <= best_score[score_idx] + threshold){
1899                                    best_score[score_idx]= score[score_idx];
1900                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1901                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1902                                    //FIXME copy instead
1903                                }
1904                            }
1905                        }
1906                    }
1907                }
1908            }
1909        }
1910    }
1911    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1912}
1913
1914#endif /* QUANTIZE2==1 */
1915
1916#define USE_HALFPEL_PLANE 0
1917
1918static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1919    int p,x,y;
1920
1921    assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1922
1923    for(p=0; p<3; p++){
1924        int is_chroma= !!p;
1925        int w= s->avctx->width  >>is_chroma;
1926        int h= s->avctx->height >>is_chroma;
1927        int ls= frame->linesize[p];
1928        uint8_t *src= frame->data[p];
1929
1930        halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931        halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1932        halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1933
1934        halfpel[0][p]= src;
1935        for(y=0; y<h; y++){
1936            for(x=0; x<w; x++){
1937                int i= y*ls + x;
1938
1939                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1940            }
1941        }
1942        for(y=0; y<h; y++){
1943            for(x=0; x<w; x++){
1944                int i= y*ls + x;
1945
1946                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1947            }
1948        }
1949        src= halfpel[1][p];
1950        for(y=0; y<h; y++){
1951            for(x=0; x<w; x++){
1952                int i= y*ls + x;
1953
1954                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1955            }
1956        }
1957
1958//FIXME border!
1959    }
1960}
1961
1962static void release_buffer(AVCodecContext *avctx){
1963    SnowContext *s = avctx->priv_data;
1964    int i;
1965
1966    if(s->last_picture[s->max_ref_frames-1].data[0]){
1967        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1968        for(i=0; i<9; i++)
1969            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1970                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1971    }
1972}
1973
1974static int frame_start(SnowContext *s){
1975   AVFrame tmp;
1976   int w= s->avctx->width; //FIXME round up to x16 ?
1977   int h= s->avctx->height;
1978
1979    if(s->current_picture.data[0]){
1980        s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
1981        s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
1982        s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
1983    }
1984
1985    release_buffer(s->avctx);
1986
1987    tmp= s->last_picture[s->max_ref_frames-1];
1988    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1989    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1990    if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1991        halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1992    s->last_picture[0]= s->current_picture;
1993    s->current_picture= tmp;
1994
1995    if(s->keyframe){
1996        s->ref_frames= 0;
1997    }else{
1998        int i;
1999        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2000            if(i && s->last_picture[i-1].key_frame)
2001                break;
2002        s->ref_frames= i;
2003        if(s->ref_frames==0){
2004            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2005            return -1;
2006        }
2007    }
2008
2009    s->current_picture.reference= 1;
2010    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2011        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2012        return -1;
2013    }
2014
2015    s->current_picture.key_frame= s->keyframe;
2016
2017    return 0;
2018}
2019
2020static av_cold void common_end(SnowContext *s){
2021    int plane_index, level, orientation, i;
2022
2023    av_freep(&s->spatial_dwt_buffer);
2024    av_freep(&s->spatial_idwt_buffer);
2025
2026    s->m.me.temp= NULL;
2027    av_freep(&s->m.me.scratchpad);
2028    av_freep(&s->m.me.map);
2029    av_freep(&s->m.me.score_map);
2030    av_freep(&s->m.obmc_scratchpad);
2031
2032    av_freep(&s->block);
2033    av_freep(&s->scratchbuf);
2034
2035    for(i=0; i<MAX_REF_FRAMES; i++){
2036        av_freep(&s->ref_mvs[i]);
2037        av_freep(&s->ref_scores[i]);
2038        if(s->last_picture[i].data[0])
2039            s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2040    }
2041
2042    for(plane_index=0; plane_index<3; plane_index++){
2043        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2044            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2045                SubBand *b= &s->plane[plane_index].band[level][orientation];
2046
2047                av_freep(&b->x_coeff);
2048            }
2049        }
2050    }
2051    if (s->mconly_picture.data[0])
2052        s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2053    if (s->current_picture.data[0])
2054        s->avctx->release_buffer(s->avctx, &s->current_picture);
2055}
2056
2057static av_cold int decode_init(AVCodecContext *avctx)
2058{
2059    avctx->pix_fmt= PIX_FMT_YUV420P;
2060
2061    common_init(avctx);
2062
2063    return 0;
2064}
2065
2066static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2067    const uint8_t *buf = avpkt->data;
2068    int buf_size = avpkt->size;
2069    SnowContext *s = avctx->priv_data;
2070    RangeCoder * const c= &s->c;
2071    int bytes_read;
2072    AVFrame *picture = data;
2073    int level, orientation, plane_index;
2074
2075    ff_init_range_decoder(c, buf, buf_size);
2076    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2077
2078    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2079    if(decode_header(s)<0)
2080        return -1;
2081    common_init_after_header(avctx);
2082
2083    // realloc slice buffer for the case that spatial_decomposition_count changed
2084    ff_slice_buffer_destroy(&s->sb);
2085    ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2086
2087    for(plane_index=0; plane_index<3; plane_index++){
2088        Plane *p= &s->plane[plane_index];
2089        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2090                                              && p->hcoeff[1]==-10
2091                                              && p->hcoeff[2]==2;
2092    }
2093
2094    alloc_blocks(s);
2095
2096    if(frame_start(s) < 0)
2097        return -1;
2098    //keyframe flag duplication mess FIXME
2099    if(avctx->debug&FF_DEBUG_PICT_INFO)
2100        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2101
2102    decode_blocks(s);
2103
2104    for(plane_index=0; plane_index<3; plane_index++){
2105        Plane *p= &s->plane[plane_index];
2106        int w= p->width;
2107        int h= p->height;
2108        int x, y;
2109        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2110
2111        if(s->avctx->debug&2048){
2112            memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2113            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2114
2115            for(y=0; y<h; y++){
2116                for(x=0; x<w; x++){
2117                    int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2118                    s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2119                }
2120            }
2121        }
2122
2123        {
2124        for(level=0; level<s->spatial_decomposition_count; level++){
2125            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2126                SubBand *b= &p->band[level][orientation];
2127                unpack_coeffs(s, b, b->parent, orientation);
2128            }
2129        }
2130        }
2131
2132        {
2133        const int mb_h= s->b_height << s->block_max_depth;
2134        const int block_size = MB_SIZE >> s->block_max_depth;
2135        const int block_w    = plane_index ? block_size/2 : block_size;
2136        int mb_y;
2137        DWTCompose cs[MAX_DECOMPOSITIONS];
2138        int yd=0, yq=0;
2139        int y;
2140        int end_y;
2141
2142        ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2143        for(mb_y=0; mb_y<=mb_h; mb_y++){
2144
2145            int slice_starty = block_w*mb_y;
2146            int slice_h = block_w*(mb_y+1);
2147            if (!(s->keyframe || s->avctx->debug&512)){
2148                slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2149                slice_h -= (block_w >> 1);
2150            }
2151
2152            for(level=0; level<s->spatial_decomposition_count; level++){
2153                for(orientation=level ? 1 : 0; orientation<4; orientation++){
2154                    SubBand *b= &p->band[level][orientation];
2155                    int start_y;
2156                    int end_y;
2157                    int our_mb_start = mb_y;
2158                    int our_mb_end = (mb_y + 1);
2159                    const int extra= 3;
2160                    start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2161                    end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2162                    if (!(s->keyframe || s->avctx->debug&512)){
2163                        start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2164                        end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2165                    }
2166                    start_y = FFMIN(b->height, start_y);
2167                    end_y = FFMIN(b->height, end_y);
2168
2169                    if (start_y != end_y){
2170                        if (orientation == 0){
2171                            SubBand * correlate_band = &p->band[0][0];
2172                            int correlate_end_y = FFMIN(b->height, end_y + 1);
2173                            int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2174                            decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2175                            correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2176                            dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2177                        }
2178                        else
2179                            decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2180                    }
2181                }
2182            }
2183
2184            for(; yd<slice_h; yd+=4){
2185                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2186            }
2187
2188            if(s->qlog == LOSSLESS_QLOG){
2189                for(; yq<slice_h && yq<h; yq++){
2190                    IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2191                    for(x=0; x<w; x++){
2192                        line[x] <<= FRAC_BITS;
2193                    }
2194                }
2195            }
2196
2197            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2198
2199            y = FFMIN(p->height, slice_starty);
2200            end_y = FFMIN(p->height, slice_h);
2201            while(y < end_y)
2202                ff_slice_buffer_release(&s->sb, y++);
2203        }
2204
2205        ff_slice_buffer_flush(&s->sb);
2206        }
2207
2208    }
2209
2210    emms_c();
2211
2212    release_buffer(avctx);
2213
2214    if(!(s->avctx->debug&2048))
2215        *picture= s->current_picture;
2216    else
2217        *picture= s->mconly_picture;
2218
2219    *data_size = sizeof(AVFrame);
2220
2221    bytes_read= c->bytestream - c->bytestream_start;
2222    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2223
2224    return bytes_read;
2225}
2226
2227static av_cold int decode_end(AVCodecContext *avctx)
2228{
2229    SnowContext *s = avctx->priv_data;
2230
2231    ff_slice_buffer_destroy(&s->sb);
2232
2233    common_end(s);
2234
2235    return 0;
2236}
2237
2238AVCodec snow_decoder = {
2239    "snow",
2240    AVMEDIA_TYPE_VIDEO,
2241    CODEC_ID_SNOW,
2242    sizeof(SnowContext),
2243    decode_init,
2244    NULL,
2245    decode_end,
2246    decode_frame,
2247    CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2248    NULL,
2249    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2250};
2251
2252#if CONFIG_SNOW_ENCODER
2253static av_cold int encode_init(AVCodecContext *avctx)
2254{
2255    SnowContext *s = avctx->priv_data;
2256    int plane_index;
2257
2258    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2259        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2260               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2261        return -1;
2262    }
2263
2264    if(avctx->prediction_method == DWT_97
2265       && (avctx->flags & CODEC_FLAG_QSCALE)
2266       && avctx->global_quality == 0){
2267        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2268        return -1;
2269    }
2270
2271    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2272
2273    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2274    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2275
2276    for(plane_index=0; plane_index<3; plane_index++){
2277        s->plane[plane_index].diag_mc= 1;
2278        s->plane[plane_index].htaps= 6;
2279        s->plane[plane_index].hcoeff[0]=  40;
2280        s->plane[plane_index].hcoeff[1]= -10;
2281        s->plane[plane_index].hcoeff[2]=   2;
2282        s->plane[plane_index].fast_mc= 1;
2283    }
2284
2285    common_init(avctx);
2286    alloc_blocks(s);
2287
2288    s->version=0;
2289
2290    s->m.avctx   = avctx;
2291    s->m.flags   = avctx->flags;
2292    s->m.bit_rate= avctx->bit_rate;
2293
2294    s->m.me.temp      =
2295    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2296    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2297    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2298    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2299    h263_encode_init(&s->m); //mv_penalty
2300
2301    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2302
2303    if(avctx->flags&CODEC_FLAG_PASS1){
2304        if(!avctx->stats_out)
2305            avctx->stats_out = av_mallocz(256);
2306    }
2307    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2308        if(ff_rate_control_init(&s->m) < 0)
2309            return -1;
2310    }
2311    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2312
2313    avctx->coded_frame= &s->current_picture;
2314    switch(avctx->pix_fmt){
2315//    case PIX_FMT_YUV444P:
2316//    case PIX_FMT_YUV422P:
2317    case PIX_FMT_YUV420P:
2318    case PIX_FMT_GRAY8:
2319//    case PIX_FMT_YUV411P:
2320//    case PIX_FMT_YUV410P:
2321        s->colorspace_type= 0;
2322        break;
2323/*    case PIX_FMT_RGB32:
2324        s->colorspace= 1;
2325        break;*/
2326    default:
2327        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2328        return -1;
2329    }
2330//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2331    s->chroma_h_shift= 1;
2332    s->chroma_v_shift= 1;
2333
2334    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2335    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2336
2337    s->avctx->get_buffer(s->avctx, &s->input_picture);
2338
2339    if(s->avctx->me_method == ME_ITER){
2340        int i;
2341        int size= s->b_width * s->b_height << 2*s->block_max_depth;
2342        for(i=0; i<s->max_ref_frames; i++){
2343            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2344            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2345        }
2346    }
2347
2348    return 0;
2349}
2350
2351//near copy & paste from dsputil, FIXME
2352static int pix_sum(uint8_t * pix, int line_size, int w)
2353{
2354    int s, i, j;
2355
2356    s = 0;
2357    for (i = 0; i < w; i++) {
2358        for (j = 0; j < w; j++) {
2359            s += pix[0];
2360            pix ++;
2361        }
2362        pix += line_size - w;
2363    }
2364    return s;
2365}
2366
2367//near copy & paste from dsputil, FIXME
2368static int pix_norm1(uint8_t * pix, int line_size, int w)
2369{
2370    int s, i, j;
2371    uint32_t *sq = ff_squareTbl + 256;
2372
2373    s = 0;
2374    for (i = 0; i < w; i++) {
2375        for (j = 0; j < w; j ++) {
2376            s += sq[pix[0]];
2377            pix ++;
2378        }
2379        pix += line_size - w;
2380    }
2381    return s;
2382}
2383
2384//FIXME copy&paste
2385#define P_LEFT P[1]
2386#define P_TOP P[2]
2387#define P_TOPRIGHT P[3]
2388#define P_MEDIAN P[4]
2389#define P_MV1 P[9]
2390#define FLAG_QPEL   1 //must be 1
2391
2392static int encode_q_branch(SnowContext *s, int level, int x, int y){
2393    uint8_t p_buffer[1024];
2394    uint8_t i_buffer[1024];
2395    uint8_t p_state[sizeof(s->block_state)];
2396    uint8_t i_state[sizeof(s->block_state)];
2397    RangeCoder pc, ic;
2398    uint8_t *pbbak= s->c.bytestream;
2399    uint8_t *pbbak_start= s->c.bytestream_start;
2400    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2401    const int w= s->b_width  << s->block_max_depth;
2402    const int h= s->b_height << s->block_max_depth;
2403    const int rem_depth= s->block_max_depth - level;
2404    const int index= (x + y*w) << rem_depth;
2405    const int block_w= 1<<(LOG2_MB_SIZE - level);
2406    int trx= (x+1)<<rem_depth;
2407    int try= (y+1)<<rem_depth;
2408    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2409    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2410    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2411    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2412    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2413    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2414    int pl = left->color[0];
2415    int pcb= left->color[1];
2416    int pcr= left->color[2];
2417    int pmx, pmy;
2418    int mx=0, my=0;
2419    int l,cr,cb;
2420    const int stride= s->current_picture.linesize[0];
2421    const int uvstride= s->current_picture.linesize[1];
2422    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2423                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2424                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2425    int P[10][2];
2426    int16_t last_mv[3][2];
2427    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2428    const int shift= 1+qpel;
2429    MotionEstContext *c= &s->m.me;
2430    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2431    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2432    int my_context= av_log2(2*FFABS(left->my - top->my));
2433    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2434    int ref, best_ref, ref_score, ref_mx, ref_my;
2435
2436    assert(sizeof(s->block_state) >= 256);
2437    if(s->keyframe){
2438        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2439        return 0;
2440    }
2441
2442//    clip predictors / edge ?
2443
2444    P_LEFT[0]= left->mx;
2445    P_LEFT[1]= left->my;
2446    P_TOP [0]= top->mx;
2447    P_TOP [1]= top->my;
2448    P_TOPRIGHT[0]= tr->mx;
2449    P_TOPRIGHT[1]= tr->my;
2450
2451    last_mv[0][0]= s->block[index].mx;
2452    last_mv[0][1]= s->block[index].my;
2453    last_mv[1][0]= right->mx;
2454    last_mv[1][1]= right->my;
2455    last_mv[2][0]= bottom->mx;
2456    last_mv[2][1]= bottom->my;
2457
2458    s->m.mb_stride=2;
2459    s->m.mb_x=
2460    s->m.mb_y= 0;
2461    c->skip= 0;
2462
2463    assert(c->  stride ==   stride);
2464    assert(c->uvstride == uvstride);
2465
2466    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2467    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2468    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2469    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2470
2471    c->xmin = - x*block_w - 16+3;
2472    c->ymin = - y*block_w - 16+3;
2473    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2474    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2475
2476    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2477    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2478    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2479    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2480    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2481    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2482    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2483
2484    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2485    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2486
2487    if (!y) {
2488        c->pred_x= P_LEFT[0];
2489        c->pred_y= P_LEFT[1];
2490    } else {
2491        c->pred_x = P_MEDIAN[0];
2492        c->pred_y = P_MEDIAN[1];
2493    }
2494
2495    score= INT_MAX;
2496    best_ref= 0;
2497    for(ref=0; ref<s->ref_frames; ref++){
2498        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2499
2500        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2501                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2502
2503        assert(ref_mx >= c->xmin);
2504        assert(ref_mx <= c->xmax);
2505        assert(ref_my >= c->ymin);
2506        assert(ref_my <= c->ymax);
2507
2508        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2509        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2510        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2511        if(s->ref_mvs[ref]){
2512            s->ref_mvs[ref][index][0]= ref_mx;
2513            s->ref_mvs[ref][index][1]= ref_my;
2514            s->ref_scores[ref][index]= ref_score;
2515        }
2516        if(score > ref_score){
2517            score= ref_score;
2518            best_ref= ref;
2519            mx= ref_mx;
2520            my= ref_my;
2521        }
2522    }
2523    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2524
2525  //  subpel search
2526    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2527    pc= s->c;
2528    pc.bytestream_start=
2529    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2530    memcpy(p_state, s->block_state, sizeof(s->block_state));
2531
2532    if(level!=s->block_max_depth)
2533        put_rac(&pc, &p_state[4 + s_context], 1);
2534    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2535    if(s->ref_frames > 1)
2536        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2537    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2538    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2539    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2540    p_len= pc.bytestream - pc.bytestream_start;
2541    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2542
2543    block_s= block_w*block_w;
2544    sum = pix_sum(current_data[0], stride, block_w);
2545    l= (sum + block_s/2)/block_s;
2546    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2547
2548    block_s= block_w*block_w>>2;
2549    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2550    cb= (sum + block_s/2)/block_s;
2551//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2552    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2553    cr= (sum + block_s/2)/block_s;
2554//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2555
2556    ic= s->c;
2557    ic.bytestream_start=
2558    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2559    memcpy(i_state, s->block_state, sizeof(s->block_state));
2560    if(level!=s->block_max_depth)
2561        put_rac(&ic, &i_state[4 + s_context], 1);
2562    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2563    put_symbol(&ic, &i_state[32],  l-pl , 1);
2564    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2565    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2566    i_len= ic.bytestream - ic.bytestream_start;
2567    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2568
2569//    assert(score==256*256*256*64-1);
2570    assert(iscore < 255*255*256 + s->lambda2*10);
2571    assert(iscore >= 0);
2572    assert(l>=0 && l<=255);
2573    assert(pl>=0 && pl<=255);
2574
2575    if(level==0){
2576        int varc= iscore >> 8;
2577        int vard= score >> 8;
2578        if (vard <= 64 || vard < varc)
2579            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2580        else
2581            c->scene_change_score+= s->m.qscale;
2582    }
2583
2584    if(level!=s->block_max_depth){
2585        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2586        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2587        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2588        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2589        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2590        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2591
2592        if(score2 < score && score2 < iscore)
2593            return score2;
2594    }
2595
2596    if(iscore < score){
2597        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2598        memcpy(pbbak, i_buffer, i_len);
2599        s->c= ic;
2600        s->c.bytestream_start= pbbak_start;
2601        s->c.bytestream= pbbak + i_len;
2602        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2603        memcpy(s->block_state, i_state, sizeof(s->block_state));
2604        return iscore;
2605    }else{
2606        memcpy(pbbak, p_buffer, p_len);
2607        s->c= pc;
2608        s->c.bytestream_start= pbbak_start;
2609        s->c.bytestream= pbbak + p_len;
2610        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2611        memcpy(s->block_state, p_state, sizeof(s->block_state));
2612        return score;
2613    }
2614}
2615
2616static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2617    const int w= s->b_width  << s->block_max_depth;
2618    const int rem_depth= s->block_max_depth - level;
2619    const int index= (x + y*w) << rem_depth;
2620    int trx= (x+1)<<rem_depth;
2621    BlockNode *b= &s->block[index];
2622    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2623    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2624    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2625    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2626    int pl = left->color[0];
2627    int pcb= left->color[1];
2628    int pcr= left->color[2];
2629    int pmx, pmy;
2630    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2631    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2632    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2633    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2634
2635    if(s->keyframe){
2636        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2637        return;
2638    }
2639
2640    if(level!=s->block_max_depth){
2641        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2642            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2643        }else{
2644            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2645            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2646            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2647            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2648            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2649            return;
2650        }
2651    }
2652    if(b->type & BLOCK_INTRA){
2653        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2654        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2655        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2656        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2657        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2658        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2659    }else{
2660        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2661        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2662        if(s->ref_frames > 1)
2663            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2664        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2665        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2666        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2667    }
2668}
2669
2670static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2671    int i, x2, y2;
2672    Plane *p= &s->plane[plane_index];
2673    const int block_size = MB_SIZE >> s->block_max_depth;
2674    const int block_w    = plane_index ? block_size/2 : block_size;
2675    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2676    const int obmc_stride= plane_index ? block_size : 2*block_size;
2677    const int ref_stride= s->current_picture.linesize[plane_index];
2678    uint8_t *src= s-> input_picture.data[plane_index];
2679    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2680    const int b_stride = s->b_width << s->block_max_depth;
2681    const int w= p->width;
2682    const int h= p->height;
2683    int index= mb_x + mb_y*b_stride;
2684    BlockNode *b= &s->block[index];
2685    BlockNode backup= *b;
2686    int ab=0;
2687    int aa=0;
2688
2689    b->type|= BLOCK_INTRA;
2690    b->color[plane_index]= 0;
2691    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2692
2693    for(i=0; i<4; i++){
2694        int mb_x2= mb_x + (i &1) - 1;
2695        int mb_y2= mb_y + (i>>1) - 1;
2696        int x= block_w*mb_x2 + block_w/2;
2697        int y= block_w*mb_y2 + block_w/2;
2698
2699        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2700                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2701
2702        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2703            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2704                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2705                int obmc_v= obmc[index];
2706                int d;
2707                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2708                if(x<0) obmc_v += obmc[index + block_w];
2709                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2710                if(x+block_w>w) obmc_v += obmc[index - block_w];
2711                //FIXME precalculate this or simplify it somehow else
2712
2713                d = -dst[index] + (1<<(FRAC_BITS-1));
2714                dst[index] = d;
2715                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2716                aa += obmc_v * obmc_v; //FIXME precalculate this
2717            }
2718        }
2719    }
2720    *b= backup;
2721
2722    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2723}
2724
2725static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2726    const int b_stride = s->b_width << s->block_max_depth;
2727    const int b_height = s->b_height<< s->block_max_depth;
2728    int index= x + y*b_stride;
2729    const BlockNode *b     = &s->block[index];
2730    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2731    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2732    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2733    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2734    int dmx, dmy;
2735//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2736//  int my_context= av_log2(2*FFABS(left->my - top->my));
2737
2738    if(x<0 || x>=b_stride || y>=b_height)
2739        return 0;
2740/*
27411            0      0
274201X          1-2    1
2743001XX        3-6    2-3
27440001XXX      7-14   4-7
274500001XXXX   15-30   8-15
2746*/
2747//FIXME try accurate rate
2748//FIXME intra and inter predictors if surrounding blocks are not the same type
2749    if(b->type & BLOCK_INTRA){
2750        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2751                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2752                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2753    }else{
2754        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2755        dmx-= b->mx;
2756        dmy-= b->my;
2757        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2758                    + av_log2(2*FFABS(dmy))
2759                    + av_log2(2*b->ref));
2760    }
2761}
2762
2763static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2764    Plane *p= &s->plane[plane_index];
2765    const int block_size = MB_SIZE >> s->block_max_depth;
2766    const int block_w    = plane_index ? block_size/2 : block_size;
2767    const int obmc_stride= plane_index ? block_size : 2*block_size;
2768    const int ref_stride= s->current_picture.linesize[plane_index];
2769    uint8_t *dst= s->current_picture.data[plane_index];
2770    uint8_t *src= s->  input_picture.data[plane_index];
2771    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2772    uint8_t *cur = s->scratchbuf;
2773    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2774    const int b_stride = s->b_width << s->block_max_depth;
2775    const int b_height = s->b_height<< s->block_max_depth;
2776    const int w= p->width;
2777    const int h= p->height;
2778    int distortion;
2779    int rate= 0;
2780    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2781    int sx= block_w*mb_x - block_w/2;
2782    int sy= block_w*mb_y - block_w/2;
2783    int x0= FFMAX(0,-sx);
2784    int y0= FFMAX(0,-sy);
2785    int x1= FFMIN(block_w*2, w-sx);
2786    int y1= FFMIN(block_w*2, h-sy);
2787    int i,x,y;
2788
2789    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2790
2791    for(y=y0; y<y1; y++){
2792        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2793        const IDWTELEM *pred1 = pred + y*obmc_stride;
2794        uint8_t *cur1 = cur + y*ref_stride;
2795        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2796        for(x=x0; x<x1; x++){
2797#if FRAC_BITS >= LOG2_OBMC_MAX
2798            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2799#else
2800            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2801#endif
2802            v = (v + pred1[x]) >> FRAC_BITS;
2803            if(v&(~255)) v= ~(v>>31);
2804            dst1[x] = v;
2805        }
2806    }
2807
2808    /* copy the regions where obmc[] = (uint8_t)256 */
2809    if(LOG2_OBMC_MAX == 8
2810        && (mb_x == 0 || mb_x == b_stride-1)
2811        && (mb_y == 0 || mb_y == b_height-1)){
2812        if(mb_x == 0)
2813            x1 = block_w;
2814        else
2815            x0 = block_w;
2816        if(mb_y == 0)
2817            y1 = block_w;
2818        else
2819            y0 = block_w;
2820        for(y=y0; y<y1; y++)
2821            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2822    }
2823
2824    if(block_w==16){
2825        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2826        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2827        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2828         * So improving the score of one block is not strictly guaranteed
2829         * to improve the score of the whole frame, thus iterative motion
2830         * estimation does not always converge. */
2831        if(s->avctx->me_cmp == FF_CMP_W97)
2832            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2833        else if(s->avctx->me_cmp == FF_CMP_W53)
2834            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2835        else{
2836            distortion = 0;
2837            for(i=0; i<4; i++){
2838                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2839                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2840            }
2841        }
2842    }else{
2843        assert(block_w==8);
2844        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2845    }
2846
2847    if(plane_index==0){
2848        for(i=0; i<4; i++){
2849/* ..RRr
2850 * .RXx.
2851 * rxx..
2852 */
2853            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2854        }
2855        if(mb_x == b_stride-2)
2856            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2857    }
2858    return distortion + rate*penalty_factor;
2859}
2860
2861static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2862    int i, y2;
2863    Plane *p= &s->plane[plane_index];
2864    const int block_size = MB_SIZE >> s->block_max_depth;
2865    const int block_w    = plane_index ? block_size/2 : block_size;
2866    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2867    const int obmc_stride= plane_index ? block_size : 2*block_size;
2868    const int ref_stride= s->current_picture.linesize[plane_index];
2869    uint8_t *dst= s->current_picture.data[plane_index];
2870    uint8_t *src= s-> input_picture.data[plane_index];
2871    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2872    // const has only been removed from zero_dst to suppress a warning
2873    static IDWTELEM zero_dst[4096]; //FIXME
2874    const int b_stride = s->b_width << s->block_max_depth;
2875    const int w= p->width;
2876    const int h= p->height;
2877    int distortion= 0;
2878    int rate= 0;
2879    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2880
2881    for(i=0; i<9; i++){
2882        int mb_x2= mb_x + (i%3) - 1;
2883        int mb_y2= mb_y + (i/3) - 1;
2884        int x= block_w*mb_x2 + block_w/2;
2885        int y= block_w*mb_y2 + block_w/2;
2886
2887        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2888                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2889
2890        //FIXME find a cleaner/simpler way to skip the outside stuff
2891        for(y2= y; y2<0; y2++)
2892            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2893        for(y2= h; y2<y+block_w; y2++)
2894            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2895        if(x<0){
2896            for(y2= y; y2<y+block_w; y2++)
2897                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2898        }
2899        if(x+block_w > w){
2900            for(y2= y; y2<y+block_w; y2++)
2901                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2902        }
2903
2904        assert(block_w== 8 || block_w==16);
2905        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2906    }
2907
2908    if(plane_index==0){
2909        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2910        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2911
2912/* ..RRRr
2913 * .RXXx.
2914 * .RXXx.
2915 * rxxx.
2916 */
2917        if(merged)
2918            rate = get_block_bits(s, mb_x, mb_y, 2);
2919        for(i=merged?4:0; i<9; i++){
2920            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2921            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2922        }
2923    }
2924    return distortion + rate*penalty_factor;
2925}
2926
2927static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2928    const int w= b->width;
2929    const int h= b->height;
2930    int x, y;
2931
2932    if(1){
2933        int run=0;
2934        int runs[w*h];
2935        int run_index=0;
2936        int max_index;
2937
2938        for(y=0; y<h; y++){
2939            for(x=0; x<w; x++){
2940                int v, p=0;
2941                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2942                v= src[x + y*stride];
2943
2944                if(y){
2945                    t= src[x + (y-1)*stride];
2946                    if(x){
2947                        lt= src[x - 1 + (y-1)*stride];
2948                    }
2949                    if(x + 1 < w){
2950                        rt= src[x + 1 + (y-1)*stride];
2951                    }
2952                }
2953                if(x){
2954                    l= src[x - 1 + y*stride];
2955                    /*if(x > 1){
2956                        if(orientation==1) ll= src[y + (x-2)*stride];
2957                        else               ll= src[x - 2 + y*stride];
2958                    }*/
2959                }
2960                if(parent){
2961                    int px= x>>1;
2962                    int py= y>>1;
2963                    if(px<b->parent->width && py<b->parent->height)
2964                        p= parent[px + py*2*stride];
2965                }
2966                if(!(/*ll|*/l|lt|t|rt|p)){
2967                    if(v){
2968                        runs[run_index++]= run;
2969                        run=0;
2970                    }else{
2971                        run++;
2972                    }
2973                }
2974            }
2975        }
2976        max_index= run_index;
2977        runs[run_index++]= run;
2978        run_index=0;
2979        run= runs[run_index++];
2980
2981        put_symbol2(&s->c, b->state[30], max_index, 0);
2982        if(run_index <= max_index)
2983            put_symbol2(&s->c, b->state[1], run, 3);
2984
2985        for(y=0; y<h; y++){
2986            if(s->c.bytestream_end - s->c.bytestream < w*40){
2987                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2988                return -1;
2989            }
2990            for(x=0; x<w; x++){
2991                int v, p=0;
2992                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2993                v= src[x + y*stride];
2994
2995                if(y){
2996                    t= src[x + (y-1)*stride];
2997                    if(x){
2998                        lt= src[x - 1 + (y-1)*stride];
2999                    }
3000                    if(x + 1 < w){
3001                        rt= src[x + 1 + (y-1)*stride];
3002                    }
3003                }
3004                if(x){
3005                    l= src[x - 1 + y*stride];
3006                    /*if(x > 1){
3007                        if(orientation==1) ll= src[y + (x-2)*stride];
3008                        else               ll= src[x - 2 + y*stride];
3009                    }*/
3010                }
3011                if(parent){
3012                    int px= x>>1;
3013                    int py= y>>1;
3014                    if(px<b->parent->width && py<b->parent->height)
3015                        p= parent[px + py*2*stride];
3016                }
3017                if(/*ll|*/l|lt|t|rt|p){
3018                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3019
3020                    put_rac(&s->c, &b->state[0][context], !!v);
3021                }else{
3022                    if(!run){
3023                        run= runs[run_index++];
3024
3025                        if(run_index <= max_index)
3026                            put_symbol2(&s->c, b->state[1], run, 3);
3027                        assert(v);
3028                    }else{
3029                        run--;
3030                        assert(!v);
3031                    }
3032                }
3033                if(v){
3034                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3035                    int l2= 2*FFABS(l) + (l<0);
3036                    int t2= 2*FFABS(t) + (t<0);
3037
3038                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3039                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3040                }
3041            }
3042        }
3043    }
3044    return 0;
3045}
3046
3047static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3048//    encode_subband_qtree(s, b, src, parent, stride, orientation);
3049//    encode_subband_z0run(s, b, src, parent, stride, orientation);
3050    return encode_subband_c0run(s, b, src, parent, stride, orientation);
3051//    encode_subband_dzr(s, b, src, parent, stride, orientation);
3052}
3053
3054static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3055    const int b_stride= s->b_width << s->block_max_depth;
3056    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3057    BlockNode backup= *block;
3058    int rd, index, value;
3059
3060    assert(mb_x>=0 && mb_y>=0);
3061    assert(mb_x<b_stride);
3062
3063    if(intra){
3064        block->color[0] = p[0];
3065        block->color[1] = p[1];
3066        block->color[2] = p[2];
3067        block->type |= BLOCK_INTRA;
3068    }else{
3069        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3070        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3071        if(s->me_cache[index] == value)
3072            return 0;
3073        s->me_cache[index]= value;
3074
3075        block->mx= p[0];
3076        block->my= p[1];
3077        block->type &= ~BLOCK_INTRA;
3078    }
3079
3080    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3081
3082//FIXME chroma
3083    if(rd < *best_rd){
3084        *best_rd= rd;
3085        return 1;
3086    }else{
3087        *block= backup;
3088        return 0;
3089    }
3090}
3091
3092/* special case for int[2] args we discard afterwards,
3093 * fixes compilation problem with gcc 2.95 */
3094static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3095    int p[2] = {p0, p1};
3096    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3097}
3098
3099static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3100    const int b_stride= s->b_width << s->block_max_depth;
3101    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3103    int rd, index, value;
3104
3105    assert(mb_x>=0 && mb_y>=0);
3106    assert(mb_x<b_stride);
3107    assert(((mb_x|mb_y)&1) == 0);
3108
3109    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3110    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3111    if(s->me_cache[index] == value)
3112        return 0;
3113    s->me_cache[index]= value;
3114
3115    block->mx= p0;
3116    block->my= p1;
3117    block->ref= ref;
3118    block->type &= ~BLOCK_INTRA;
3119    block[1]= block[b_stride]= block[b_stride+1]= *block;
3120
3121    rd= get_4block_rd(s, mb_x, mb_y, 0);
3122
3123//FIXME chroma
3124    if(rd < *best_rd){
3125        *best_rd= rd;
3126        return 1;
3127    }else{
3128        block[0]= backup[0];
3129        block[1]= backup[1];
3130        block[b_stride]= backup[2];
3131        block[b_stride+1]= backup[3];
3132        return 0;
3133    }
3134}
3135
3136static void iterative_me(SnowContext *s){
3137    int pass, mb_x, mb_y;
3138    const int b_width = s->b_width  << s->block_max_depth;
3139    const int b_height= s->b_height << s->block_max_depth;
3140    const int b_stride= b_width;
3141    int color[3];
3142
3143    {
3144        RangeCoder r = s->c;
3145        uint8_t state[sizeof(s->block_state)];
3146        memcpy(state, s->block_state, sizeof(s->block_state));
3147        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3148            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3149                encode_q_branch(s, 0, mb_x, mb_y);
3150        s->c = r;
3151        memcpy(s->block_state, state, sizeof(s->block_state));
3152    }
3153
3154    for(pass=0; pass<25; pass++){
3155        int change= 0;
3156
3157        for(mb_y= 0; mb_y<b_height; mb_y++){
3158            for(mb_x= 0; mb_x<b_width; mb_x++){
3159                int dia_change, i, j, ref;
3160                int best_rd= INT_MAX, ref_rd;
3161                BlockNode backup, ref_b;
3162                const int index= mb_x + mb_y * b_stride;
3163                BlockNode *block= &s->block[index];
3164                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3165                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3166                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3167                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3168                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3169                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3170                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3171                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3172                const int b_w= (MB_SIZE >> s->block_max_depth);
3173                uint8_t obmc_edged[b_w*2][b_w*2];
3174
3175                if(pass && (block->type & BLOCK_OPT))
3176                    continue;
3177                block->type |= BLOCK_OPT;
3178
3179                backup= *block;
3180
3181                if(!s->me_cache_generation)
3182                    memset(s->me_cache, 0, sizeof(s->me_cache));
3183                s->me_cache_generation += 1<<22;
3184
3185                //FIXME precalculate
3186                {
3187                    int x, y;
3188                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3189                    if(mb_x==0)
3190                        for(y=0; y<b_w*2; y++)
3191                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3192                    if(mb_x==b_stride-1)
3193                        for(y=0; y<b_w*2; y++)
3194                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3195                    if(mb_y==0){
3196                        for(x=0; x<b_w*2; x++)
3197                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3198                        for(y=1; y<b_w; y++)
3199                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3200                    }
3201                    if(mb_y==b_height-1){
3202                        for(x=0; x<b_w*2; x++)
3203                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3204                        for(y=b_w; y<b_w*2-1; y++)
3205                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3206                    }
3207                }
3208
3209                //skip stuff outside the picture
3210                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3211                    uint8_t *src= s->  input_picture.data[0];
3212                    uint8_t *dst= s->current_picture.data[0];
3213                    const int stride= s->current_picture.linesize[0];
3214                    const int block_w= MB_SIZE >> s->block_max_depth;
3215                    const int sx= block_w*mb_x - block_w/2;
3216                    const int sy= block_w*mb_y - block_w/2;
3217                    const int w= s->plane[0].width;
3218                    const int h= s->plane[0].height;
3219                    int y;
3220
3221                    for(y=sy; y<0; y++)
3222                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3223                    for(y=h; y<sy+block_w*2; y++)
3224                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3225                    if(sx<0){
3226                        for(y=sy; y<sy+block_w*2; y++)
3227                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3228                    }
3229                    if(sx+block_w*2 > w){
3230                        for(y=sy; y<sy+block_w*2; y++)
3231                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3232                    }
3233                }
3234
3235                // intra(black) = neighbors' contribution to the current block
3236                for(i=0; i<3; i++)
3237                    color[i]= get_dc(s, mb_x, mb_y, i);
3238
3239                // get previous score (cannot be cached due to OBMC)
3240                if(pass > 0 && (block->type&BLOCK_INTRA)){
3241                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3242                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3243                }else
3244                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3245
3246                ref_b= *block;
3247                ref_rd= best_rd;
3248                for(ref=0; ref < s->ref_frames; ref++){
3249                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3250                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3251                        continue;
3252                    block->ref= ref;
3253                    best_rd= INT_MAX;
3254
3255                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3256                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3257                    if(tb)
3258                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3259                    if(lb)
3260                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3261                    if(rb)
3262                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3263                    if(bb)
3264                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3265
3266                    /* fullpel ME */
3267                    //FIXME avoid subpel interpolation / round to nearest integer
3268                    do{
3269                        dia_change=0;
3270                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3271                            for(j=0; j<i; j++){
3272                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3273                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3274                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3275                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3276                            }
3277                        }
3278                    }while(dia_change);
3279                    /* subpel ME */
3280                    do{
3281                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3282                        dia_change=0;
3283                        for(i=0; i<8; i++)
3284                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3285                    }while(dia_change);
3286                    //FIXME or try the standard 2 pass qpel or similar
3287
3288                    mvr[0][0]= block->mx;
3289                    mvr[0][1]= block->my;
3290                    if(ref_rd > best_rd){
3291                        ref_rd= best_rd;
3292                        ref_b= *block;
3293                    }
3294                }
3295                best_rd= ref_rd;
3296                *block= ref_b;
3297#if 1
3298                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3299                //FIXME RD style color selection
3300#endif
3301                if(!same_block(block, &backup)){
3302                    if(tb ) tb ->type &= ~BLOCK_OPT;
3303                    if(lb ) lb ->type &= ~BLOCK_OPT;
3304                    if(rb ) rb ->type &= ~BLOCK_OPT;
3305                    if(bb ) bb ->type &= ~BLOCK_OPT;
3306                    if(tlb) tlb->type &= ~BLOCK_OPT;
3307                    if(trb) trb->type &= ~BLOCK_OPT;
3308                    if(blb) blb->type &= ~BLOCK_OPT;
3309                    if(brb) brb->type &= ~BLOCK_OPT;
3310                    change ++;
3311                }
3312            }
3313        }
3314        av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3315        if(!change)
3316            break;
3317    }
3318
3319    if(s->block_max_depth == 1){
3320        int change= 0;
3321        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3322            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3323                int i;
3324                int best_rd, init_rd;
3325                const int index= mb_x + mb_y * b_stride;
3326                BlockNode *b[4];
3327
3328                b[0]= &s->block[index];
3329                b[1]= b[0]+1;
3330                b[2]= b[0]+b_stride;
3331                b[3]= b[2]+1;
3332                if(same_block(b[0], b[1]) &&
3333                   same_block(b[0], b[2]) &&
3334                   same_block(b[0], b[3]))
3335                    continue;
3336
3337                if(!s->me_cache_generation)
3338                    memset(s->me_cache, 0, sizeof(s->me_cache));
3339                s->me_cache_generation += 1<<22;
3340
3341                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3342
3343                //FIXME more multiref search?
3344                check_4block_inter(s, mb_x, mb_y,
3345                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3346                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3347
3348                for(i=0; i<4; i++)
3349                    if(!(b[i]->type&BLOCK_INTRA))
3350                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3351
3352                if(init_rd != best_rd)
3353                    change++;
3354            }
3355        }
3356        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3357    }
3358}
3359
3360static void encode_blocks(SnowContext *s, int search){
3361    int x, y;
3362    int w= s->b_width;
3363    int h= s->b_height;
3364
3365    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3366        iterative_me(s);
3367
3368    for(y=0; y<h; y++){
3369        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3370            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3371            return;
3372        }
3373        for(x=0; x<w; x++){
3374            if(s->avctx->me_method == ME_ITER || !search)
3375                encode_q_branch2(s, 0, x, y);
3376            else
3377                encode_q_branch (s, 0, x, y);
3378        }
3379    }
3380}
3381
3382static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3383    const int w= b->width;
3384    const int h= b->height;
3385    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3386    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3387    int x,y, thres1, thres2;
3388
3389    if(s->qlog == LOSSLESS_QLOG){
3390        for(y=0; y<h; y++)
3391            for(x=0; x<w; x++)
3392                dst[x + y*stride]= src[x + y*stride];
3393        return;
3394    }
3395
3396    bias= bias ? 0 : (3*qmul)>>3;
3397    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3398    thres2= 2*thres1;
3399
3400    if(!bias){
3401        for(y=0; y<h; y++){
3402            for(x=0; x<w; x++){
3403                int i= src[x + y*stride];
3404
3405                if((unsigned)(i+thres1) > thres2){
3406                    if(i>=0){
3407                        i<<= QEXPSHIFT;
3408                        i/= qmul; //FIXME optimize
3409                        dst[x + y*stride]=  i;
3410                    }else{
3411                        i= -i;
3412                        i<<= QEXPSHIFT;
3413                        i/= qmul; //FIXME optimize
3414                        dst[x + y*stride]= -i;
3415                    }
3416                }else
3417                    dst[x + y*stride]= 0;
3418            }
3419        }
3420    }else{
3421        for(y=0; y<h; y++){
3422            for(x=0; x<w; x++){
3423                int i= src[x + y*stride];
3424
3425                if((unsigned)(i+thres1) > thres2){
3426                    if(i>=0){
3427                        i<<= QEXPSHIFT;
3428                        i= (i + bias) / qmul; //FIXME optimize
3429                        dst[x + y*stride]=  i;
3430                    }else{
3431                        i= -i;
3432                        i<<= QEXPSHIFT;
3433                        i= (i + bias) / qmul; //FIXME optimize
3434                        dst[x + y*stride]= -i;
3435                    }
3436                }else
3437                    dst[x + y*stride]= 0;
3438            }
3439        }
3440    }
3441}
3442
3443static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3444    const int w= b->width;
3445    const int h= b->height;
3446    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3447    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3448    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3449    int x,y;
3450
3451    if(s->qlog == LOSSLESS_QLOG) return;
3452
3453    for(y=0; y<h; y++){
3454        for(x=0; x<w; x++){
3455            int i= src[x + y*stride];
3456            if(i<0){
3457                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3458            }else if(i>0){
3459                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3460            }
3461        }
3462    }
3463}
3464
3465static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3466    const int w= b->width;
3467    const int h= b->height;
3468    int x,y;
3469
3470    for(y=h-1; y>=0; y--){
3471        for(x=w-1; x>=0; x--){
3472            int i= x + y*stride;
3473
3474            if(x){
3475                if(use_median){
3476                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3477                    else  src[i] -= src[i - 1];
3478                }else{
3479                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3480                    else  src[i] -= src[i - 1];
3481                }
3482            }else{
3483                if(y) src[i] -= src[i - stride];
3484            }
3485        }
3486    }
3487}
3488
3489static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3490    const int w= b->width;
3491    const int h= b->height;
3492    int x,y;
3493
3494    for(y=0; y<h; y++){
3495        for(x=0; x<w; x++){
3496            int i= x + y*stride;
3497
3498            if(x){
3499                if(use_median){
3500                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3501                    else  src[i] += src[i - 1];
3502                }else{
3503                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3504                    else  src[i] += src[i - 1];
3505                }
3506            }else{
3507                if(y) src[i] += src[i - stride];
3508            }
3509        }
3510    }
3511}
3512
3513static void encode_qlogs(SnowContext *s){
3514    int plane_index, level, orientation;
3515
3516    for(plane_index=0; plane_index<2; plane_index++){
3517        for(level=0; level<s->spatial_decomposition_count; level++){
3518            for(orientation=level ? 1:0; orientation<4; orientation++){
3519                if(orientation==2) continue;
3520                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3521            }
3522        }
3523    }
3524}
3525
3526static void encode_header(SnowContext *s){
3527    int plane_index, i;
3528    uint8_t kstate[32];
3529
3530    memset(kstate, MID_STATE, sizeof(kstate));
3531
3532    put_rac(&s->c, kstate, s->keyframe);
3533    if(s->keyframe || s->always_reset){
3534        reset_contexts(s);
3535        s->last_spatial_decomposition_type=
3536        s->last_qlog=
3537        s->last_qbias=
3538        s->last_mv_scale=
3539        s->last_block_max_depth= 0;
3540        for(plane_index=0; plane_index<2; plane_index++){
3541            Plane *p= &s->plane[plane_index];
3542            p->last_htaps=0;
3543            p->last_diag_mc=0;
3544            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3545        }
3546    }
3547    if(s->keyframe){
3548        put_symbol(&s->c, s->header_state, s->version, 0);
3549        put_rac(&s->c, s->header_state, s->always_reset);
3550        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3551        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3552        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3553        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3554        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3555        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3556        put_rac(&s->c, s->header_state, s->spatial_scalability);
3557//        put_rac(&s->c, s->header_state, s->rate_scalability);
3558        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3559
3560        encode_qlogs(s);
3561    }
3562
3563    if(!s->keyframe){
3564        int update_mc=0;
3565        for(plane_index=0; plane_index<2; plane_index++){
3566            Plane *p= &s->plane[plane_index];
3567            update_mc |= p->last_htaps   != p->htaps;
3568            update_mc |= p->last_diag_mc != p->diag_mc;
3569            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3570        }
3571        put_rac(&s->c, s->header_state, update_mc);
3572        if(update_mc){
3573            for(plane_index=0; plane_index<2; plane_index++){
3574                Plane *p= &s->plane[plane_index];
3575                put_rac(&s->c, s->header_state, p->diag_mc);
3576                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3577                for(i= p->htaps/2; i; i--)
3578                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3579            }
3580        }
3581        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3582            put_rac(&s->c, s->header_state, 1);
3583            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3584            encode_qlogs(s);
3585        }else
3586            put_rac(&s->c, s->header_state, 0);
3587    }
3588
3589    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3590    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3591    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3592    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3593    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3594
3595}
3596
3597static void update_last_header_values(SnowContext *s){
3598    int plane_index;
3599
3600    if(!s->keyframe){
3601        for(plane_index=0; plane_index<2; plane_index++){
3602            Plane *p= &s->plane[plane_index];
3603            p->last_diag_mc= p->diag_mc;
3604            p->last_htaps  = p->htaps;
3605            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3606        }
3607    }
3608
3609    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3610    s->last_qlog                        = s->qlog;
3611    s->last_qbias                       = s->qbias;
3612    s->last_mv_scale                    = s->mv_scale;
3613    s->last_block_max_depth             = s->block_max_depth;
3614    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3615}
3616
3617static int qscale2qlog(int qscale){
3618    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3619           + 61*QROOT/8; //<64 >60
3620}
3621
3622static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3623{
3624    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3625     * FIXME we know exact mv bits at this point,
3626     * but ratecontrol isn't set up to include them. */
3627    uint32_t coef_sum= 0;
3628    int level, orientation, delta_qlog;
3629
3630    for(level=0; level<s->spatial_decomposition_count; level++){
3631        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3632            SubBand *b= &s->plane[0].band[level][orientation];
3633            IDWTELEM *buf= b->ibuf;
3634            const int w= b->width;
3635            const int h= b->height;
3636            const int stride= b->stride;
3637            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3638            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3639            const int qdiv= (1<<16)/qmul;
3640            int x, y;
3641            //FIXME this is ugly
3642            for(y=0; y<h; y++)
3643                for(x=0; x<w; x++)
3644                    buf[x+y*stride]= b->buf[x+y*stride];
3645            if(orientation==0)
3646                decorrelate(s, b, buf, stride, 1, 0);
3647            for(y=0; y<h; y++)
3648                for(x=0; x<w; x++)
3649                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3650        }
3651    }
3652
3653    /* ugly, ratecontrol just takes a sqrt again */
3654    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3655    assert(coef_sum < INT_MAX);
3656
3657    if(pict->pict_type == FF_I_TYPE){
3658        s->m.current_picture.mb_var_sum= coef_sum;
3659        s->m.current_picture.mc_mb_var_sum= 0;
3660    }else{
3661        s->m.current_picture.mc_mb_var_sum= coef_sum;
3662        s->m.current_picture.mb_var_sum= 0;
3663    }
3664
3665    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3666    if (pict->quality < 0)
3667        return INT_MIN;
3668    s->lambda= pict->quality * 3/2;
3669    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3670    s->qlog+= delta_qlog;
3671    return delta_qlog;
3672}
3673
3674static void calculate_visual_weight(SnowContext *s, Plane *p){
3675    int width = p->width;
3676    int height= p->height;
3677    int level, orientation, x, y;
3678
3679    for(level=0; level<s->spatial_decomposition_count; level++){
3680        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3681            SubBand *b= &p->band[level][orientation];
3682            IDWTELEM *ibuf= b->ibuf;
3683            int64_t error=0;
3684
3685            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3686            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3687            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3688            for(y=0; y<height; y++){
3689                for(x=0; x<width; x++){
3690                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3691                    error += d*d;
3692                }
3693            }
3694
3695            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3696        }
3697    }
3698}
3699
3700static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3701    SnowContext *s = avctx->priv_data;
3702    RangeCoder * const c= &s->c;
3703    AVFrame *pict = data;
3704    const int width= s->avctx->width;
3705    const int height= s->avctx->height;
3706    int level, orientation, plane_index, i, y;
3707    uint8_t rc_header_bak[sizeof(s->header_state)];
3708    uint8_t rc_block_bak[sizeof(s->block_state)];
3709
3710    ff_init_range_encoder(c, buf, buf_size);
3711    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3712
3713    for(i=0; i<3; i++){
3714        int shift= !!i;
3715        for(y=0; y<(height>>shift); y++)
3716            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3717                   &pict->data[i][y * pict->linesize[i]],
3718                   width>>shift);
3719    }
3720    s->new_picture = *pict;
3721
3722    s->m.picture_number= avctx->frame_number;
3723    if(avctx->flags&CODEC_FLAG_PASS2){
3724        s->m.pict_type =
3725        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3726        s->keyframe= pict->pict_type==FF_I_TYPE;
3727        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3728            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3729            if (pict->quality < 0)
3730                return -1;
3731        }
3732    }else{
3733        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3734        s->m.pict_type=
3735        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3736    }
3737
3738    if(s->pass1_rc && avctx->frame_number == 0)
3739        pict->quality= 2*FF_QP2LAMBDA;
3740    if(pict->quality){
3741        s->qlog= qscale2qlog(pict->quality);
3742        s->lambda = pict->quality * 3/2;
3743    }
3744    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3745        s->qlog= LOSSLESS_QLOG;
3746        s->lambda = 0;
3747    }//else keep previous frame's qlog until after motion estimation
3748
3749    frame_start(s);
3750
3751    s->m.current_picture_ptr= &s->m.current_picture;
3752    s->m.last_picture.pts= s->m.current_picture.pts;
3753    s->m.current_picture.pts= pict->pts;
3754    if(pict->pict_type == FF_P_TYPE){
3755        int block_width = (width +15)>>4;
3756        int block_height= (height+15)>>4;
3757        int stride= s->current_picture.linesize[0];
3758
3759        assert(s->current_picture.data[0]);
3760        assert(s->last_picture[0].data[0]);
3761
3762        s->m.avctx= s->avctx;
3763        s->m.current_picture.data[0]= s->current_picture.data[0];
3764        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
3765        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3766        s->m.   last_picture_ptr= &s->m.   last_picture;
3767        s->m.linesize=
3768        s->m.   last_picture.linesize[0]=
3769        s->m.    new_picture.linesize[0]=
3770        s->m.current_picture.linesize[0]= stride;
3771        s->m.uvlinesize= s->current_picture.linesize[1];
3772        s->m.width = width;
3773        s->m.height= height;
3774        s->m.mb_width = block_width;
3775        s->m.mb_height= block_height;
3776        s->m.mb_stride=   s->m.mb_width+1;
3777        s->m.b8_stride= 2*s->m.mb_width+1;
3778        s->m.f_code=1;
3779        s->m.pict_type= pict->pict_type;
3780        s->m.me_method= s->avctx->me_method;
3781        s->m.me.scene_change_score=0;
3782        s->m.flags= s->avctx->flags;
3783        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3784        s->m.out_format= FMT_H263;
3785        s->m.unrestricted_mv= 1;
3786
3787        s->m.lambda = s->lambda;
3788        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3789        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3790
3791        s->m.dsp= s->dsp; //move
3792        ff_init_me(&s->m);
3793        s->dsp= s->m.dsp;
3794    }
3795
3796    if(s->pass1_rc){
3797        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3798        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3799    }
3800
3801redo_frame:
3802
3803    if(pict->pict_type == FF_I_TYPE)
3804        s->spatial_decomposition_count= 5;
3805    else
3806        s->spatial_decomposition_count= 5;
3807
3808    s->m.pict_type = pict->pict_type;
3809    s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
3810
3811    common_init_after_header(avctx);
3812
3813    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3814        for(plane_index=0; plane_index<3; plane_index++){
3815            calculate_visual_weight(s, &s->plane[plane_index]);
3816        }
3817    }
3818
3819    encode_header(s);
3820    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3821    encode_blocks(s, 1);
3822    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3823
3824    for(plane_index=0; plane_index<3; plane_index++){
3825        Plane *p= &s->plane[plane_index];
3826        int w= p->width;
3827        int h= p->height;
3828        int x, y;
3829//        int bits= put_bits_count(&s->c.pb);
3830
3831        if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3832            //FIXME optimize
3833            if(pict->data[plane_index]) //FIXME gray hack
3834                for(y=0; y<h; y++){
3835                    for(x=0; x<w; x++){
3836                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3837                    }
3838                }
3839            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3840
3841            if(   plane_index==0
3842               && pict->pict_type == FF_P_TYPE
3843               && !(avctx->flags&CODEC_FLAG_PASS2)
3844               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3845                ff_init_range_encoder(c, buf, buf_size);
3846                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3847                pict->pict_type= FF_I_TYPE;
3848                s->keyframe=1;
3849                s->current_picture.key_frame=1;
3850                goto redo_frame;
3851            }
3852
3853            if(s->qlog == LOSSLESS_QLOG){
3854                for(y=0; y<h; y++){
3855                    for(x=0; x<w; x++){
3856                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3857                    }
3858                }
3859            }else{
3860                for(y=0; y<h; y++){
3861                    for(x=0; x<w; x++){
3862                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3863                    }
3864                }
3865            }
3866
3867            /*  if(QUANTIZE2)
3868                dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3869            else*/
3870                ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3871
3872            if(s->pass1_rc && plane_index==0){
3873                int delta_qlog = ratecontrol_1pass(s, pict);
3874                if (delta_qlog <= INT_MIN)
3875                    return -1;
3876                if(delta_qlog){
3877                    //reordering qlog in the bitstream would eliminate this reset
3878                    ff_init_range_encoder(c, buf, buf_size);
3879                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3880                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3881                    encode_header(s);
3882                    encode_blocks(s, 0);
3883                }
3884            }
3885
3886            for(level=0; level<s->spatial_decomposition_count; level++){
3887                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3888                    SubBand *b= &p->band[level][orientation];
3889
3890                    if(!QUANTIZE2)
3891                        quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3892                    if(orientation==0)
3893                        decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
3894                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3895                    assert(b->parent==NULL || b->parent->stride == b->stride*2);
3896                    if(orientation==0)
3897                        correlate(s, b, b->ibuf, b->stride, 1, 0);
3898                }
3899            }
3900
3901            for(level=0; level<s->spatial_decomposition_count; level++){
3902                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3903                    SubBand *b= &p->band[level][orientation];
3904
3905                    dequantize(s, b, b->ibuf, b->stride);
3906                }
3907            }
3908
3909            ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3910            if(s->qlog == LOSSLESS_QLOG){
3911                for(y=0; y<h; y++){
3912                    for(x=0; x<w; x++){
3913                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3914                    }
3915                }
3916            }
3917            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3918        }else{
3919            //ME/MC only
3920            if(pict->pict_type == FF_I_TYPE){
3921                for(y=0; y<h; y++){
3922                    for(x=0; x<w; x++){
3923                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3924                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
3925                    }
3926                }
3927            }else{
3928                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3929                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3930            }
3931        }
3932        if(s->avctx->flags&CODEC_FLAG_PSNR){
3933            int64_t error= 0;
3934
3935            if(pict->data[plane_index]) //FIXME gray hack
3936                for(y=0; y<h; y++){
3937                    for(x=0; x<w; x++){
3938                        int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3939                        error += d*d;
3940                    }
3941                }
3942            s->avctx->error[plane_index] += error;
3943            s->current_picture.error[plane_index] = error;
3944        }
3945
3946    }
3947
3948    update_last_header_values(s);
3949
3950    release_buffer(avctx);
3951
3952    s->current_picture.coded_picture_number = avctx->frame_number;
3953    s->current_picture.pict_type = pict->pict_type;
3954    s->current_picture.quality = pict->quality;
3955    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3956    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3957    s->m.current_picture.display_picture_number =
3958    s->m.current_picture.coded_picture_number = avctx->frame_number;
3959    s->m.current_picture.quality = pict->quality;
3960    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3961    if(s->pass1_rc)
3962        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3963            return -1;
3964    if(avctx->flags&CODEC_FLAG_PASS1)
3965        ff_write_pass1_stats(&s->m);
3966    s->m.last_pict_type = s->m.pict_type;
3967    avctx->frame_bits = s->m.frame_bits;
3968    avctx->mv_bits = s->m.mv_bits;
3969    avctx->misc_bits = s->m.misc_bits;
3970    avctx->p_tex_bits = s->m.p_tex_bits;
3971
3972    emms_c();
3973
3974    return ff_rac_terminate(c);
3975}
3976
3977static av_cold int encode_end(AVCodecContext *avctx)
3978{
3979    SnowContext *s = avctx->priv_data;
3980
3981    common_end(s);
3982    if (s->input_picture.data[0])
3983        avctx->release_buffer(avctx, &s->input_picture);
3984    av_free(avctx->stats_out);
3985
3986    return 0;
3987}
3988
3989AVCodec snow_encoder = {
3990    "snow",
3991    AVMEDIA_TYPE_VIDEO,
3992    CODEC_ID_SNOW,
3993    sizeof(SnowContext),
3994    encode_init,
3995    encode_frame,
3996    encode_end,
3997    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3998};
3999#endif
4000
4001
4002#ifdef TEST
4003#undef malloc
4004#undef free
4005#undef printf
4006
4007#include "libavutil/lfg.h"
4008
4009int main(void){
4010    int width=256;
4011    int height=256;
4012    int buffer[2][width*height];
4013    SnowContext s;
4014    int i;
4015    AVLFG prng;
4016    s.spatial_decomposition_count=6;
4017    s.spatial_decomposition_type=1;
4018
4019    av_lfg_init(&prng, 1);
4020
4021    printf("testing 5/3 DWT\n");
4022    for(i=0; i<width*height; i++)
4023        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4024
4025    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4026    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4027
4028    for(i=0; i<width*height; i++)
4029        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4030
4031    printf("testing 9/7 DWT\n");
4032    s.spatial_decomposition_type=0;
4033    for(i=0; i<width*height; i++)
4034        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4035
4036    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4037    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4038
4039    for(i=0; i<width*height; i++)
4040        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4041
4042#if 0
4043    printf("testing AC coder\n");
4044    memset(s.header_state, 0, sizeof(s.header_state));
4045    ff_init_range_encoder(&s.c, buffer[0], 256*256);
4046    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4047
4048    for(i=-256; i<256; i++){
4049        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4050    }
4051    ff_rac_terminate(&s.c);
4052
4053    memset(s.header_state, 0, sizeof(s.header_state));
4054    ff_init_range_decoder(&s.c, buffer[0], 256*256);
4055    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4056
4057    for(i=-256; i<256; i++){
4058        int j;
4059        j= get_symbol(&s.c, s.header_state, 1);
4060        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4061    }
4062#endif
4063    {
4064    int level, orientation, x, y;
4065    int64_t errors[8][4];
4066    int64_t g=0;
4067
4068        memset(errors, 0, sizeof(errors));
4069        s.spatial_decomposition_count=3;
4070        s.spatial_decomposition_type=0;
4071        for(level=0; level<s.spatial_decomposition_count; level++){
4072            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4073                int w= width  >> (s.spatial_decomposition_count-level);
4074                int h= height >> (s.spatial_decomposition_count-level);
4075                int stride= width  << (s.spatial_decomposition_count-level);
4076                DWTELEM *buf= buffer[0];
4077                int64_t error=0;
4078
4079                if(orientation&1) buf+=w;
4080                if(orientation>1) buf+=stride>>1;
4081
4082                memset(buffer[0], 0, sizeof(int)*width*height);
4083                buf[w/2 + h/2*stride]= 256*256;
4084                ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4085                for(y=0; y<height; y++){
4086                    for(x=0; x<width; x++){
4087                        int64_t d= buffer[0][x + y*width];
4088                        error += d*d;
4089                        if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4090                    }
4091                    if(FFABS(height/2-y)<9 && level==2) printf("\n");
4092                }
4093                error= (int)(sqrt(error)+0.5);
4094                errors[level][orientation]= error;
4095                if(g) g=av_gcd(g, error);
4096                else g= error;
4097            }
4098        }
4099        printf("static int const visual_weight[][4]={\n");
4100        for(level=0; level<s.spatial_decomposition_count; level++){
4101            printf("  {");
4102            for(orientation=0; orientation<4; orientation++){
4103                printf("%8"PRId64",", errors[level][orientation]/g);
4104            }
4105            printf("},\n");
4106        }
4107        printf("};\n");
4108        {
4109            int level=2;
4110            int w= width  >> (s.spatial_decomposition_count-level);
4111            //int h= height >> (s.spatial_decomposition_count-level);
4112            int stride= width  << (s.spatial_decomposition_count-level);
4113            DWTELEM *buf= buffer[0];
4114            int64_t error=0;
4115
4116            buf+=w;
4117            buf+=stride>>1;
4118
4119            memset(buffer[0], 0, sizeof(int)*width*height);
4120#if 1
4121            for(y=0; y<height; y++){
4122                for(x=0; x<width; x++){
4123                    int tab[4]={0,2,3,1};
4124                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4125                }
4126            }
4127            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4128#else
4129            for(y=0; y<h; y++){
4130                for(x=0; x<w; x++){
4131                    buf[x + y*stride  ]=169;
4132                    buf[x + y*stride-w]=64;
4133                }
4134            }
4135            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4136#endif
4137            for(y=0; y<height; y++){
4138                for(x=0; x<width; x++){
4139                    int64_t d= buffer[0][x + y*width];
4140                    error += d*d;
4141                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4142                }
4143                if(FFABS(height/2-y)<9) printf("\n");
4144            }
4145        }
4146
4147    }
4148    return 0;
4149}
4150#endif /* TEST */
4151