1/*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "avcodec.h"
22#include "dsputil.h"
23#include "snow.h"
24
25#include "rangecoder.h"
26#include "mathops.h"
27
28#include "mpegvideo.h"
29
30#undef NDEBUG
31#include <assert.h>
32
33static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50};
51static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68};
69static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86};
87static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104};
105static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122};
123static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140};
141static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158};
159static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176};
177
178#if 0 //64*cubic
179static const uint8_t obmc32[1024]={
180  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
182  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
183  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
184  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
185  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
186  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
187  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
188  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
189  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
190  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
191  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
192  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
193  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
194  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
195  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
197  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
199  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
200  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
201  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
202  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
203  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
204  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
205  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
206  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
207  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
208  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
209  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
210  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
211  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
212//error:0.000022
213};
214static const uint8_t obmc16[256]={
215  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
216  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
217  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
218  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
219  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
220  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
221  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
222  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
224  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
226  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
227  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
228  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
229  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
230  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
231//error:0.000033
232};
233#elif 1 // 64*linear
234static const uint8_t obmc32[1024]={
235  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
236  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
237  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
238  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
239  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
240  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
241  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
242  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
243  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
244  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
245  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
246  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
247  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
248  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
249  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
250  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
252  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
254  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
255  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
256  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
257  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
258  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
259  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
260  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
261  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
262  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
263  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
264  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
265  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
266  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
267 //error:0.000020
268};
269static const uint8_t obmc16[256]={
270  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
271  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
272  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
273  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
274  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
282  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
283  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
284  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
285  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
286//error:0.000015
287};
288#else //64*cos
289static const uint8_t obmc32[1024]={
290  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
291  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
292  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
293  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
294  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
295  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
296  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
297  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
298  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
299  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
300  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
301  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
302  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
303  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
304  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
305  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
307  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
309  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
310  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
311  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
312  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
313  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
314  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
315  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
316  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
317  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
318  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
319  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
320  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
321  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
322//error:0.000022
323};
324static const uint8_t obmc16[256]={
325  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
326  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
327  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
328  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
329  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
330  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
331  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
332  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
334  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
336  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
337  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
338  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
339  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
340  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
341//error:0.000022
342};
343#endif /* 0 */
344
345//linear *64
346static const uint8_t obmc8[64]={
347  4, 12, 20, 28, 28, 20, 12,  4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354  4, 12, 20, 28, 28, 20, 12,  4,
355//error:0.000000
356};
357
358//linear *64
359static const uint8_t obmc4[16]={
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
364//error:0.000000
365};
366
367static const uint8_t * const obmc_tab[4]={
368    obmc32, obmc16, obmc8, obmc4
369};
370
371static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372
373typedef struct BlockNode{
374    int16_t mx;
375    int16_t my;
376    uint8_t ref;
377    uint8_t color[3];
378    uint8_t type;
379//#define TYPE_SPLIT    1
380#define BLOCK_INTRA   1
381#define BLOCK_OPT     2
382//#define TYPE_NOCOLOR  4
383    uint8_t level; //FIXME merge into type?
384}BlockNode;
385
386static const BlockNode null_block= { //FIXME add border maybe
387    .color= {128,128,128},
388    .mx= 0,
389    .my= 0,
390    .ref= 0,
391    .type= 0,
392    .level= 0,
393};
394
395#define LOG2_MB_SIZE 4
396#define MB_SIZE (1<<LOG2_MB_SIZE)
397#define ENCODER_EXTRA_BITS 4
398#define HTAPS_MAX 8
399
400typedef struct x_and_coeff{
401    int16_t x;
402    uint16_t coeff;
403} x_and_coeff;
404
405typedef struct SubBand{
406    int level;
407    int stride;
408    int width;
409    int height;
410    int qlog;        ///< log(qscale)/log[2^(1/6)]
411    DWTELEM *buf;
412    IDWTELEM *ibuf;
413    int buf_x_offset;
414    int buf_y_offset;
415    int stride_line; ///< Stride measured in lines, not pixels.
416    x_and_coeff * x_coeff;
417    struct SubBand *parent;
418    uint8_t state[/*7*2*/ 7 + 512][32];
419}SubBand;
420
421typedef struct Plane{
422    int width;
423    int height;
424    SubBand band[MAX_DECOMPOSITIONS][4];
425
426    int htaps;
427    int8_t hcoeff[HTAPS_MAX/2];
428    int diag_mc;
429    int fast_mc;
430
431    int last_htaps;
432    int8_t last_hcoeff[HTAPS_MAX/2];
433    int last_diag_mc;
434}Plane;
435
436typedef struct SnowContext{
437//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438
439    AVCodecContext *avctx;
440    RangeCoder c;
441    DSPContext dsp;
442    AVFrame new_picture;
443    AVFrame input_picture;              ///< new_picture with the internal linesizes
444    AVFrame current_picture;
445    AVFrame last_picture[MAX_REF_FRAMES];
446    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447    AVFrame mconly_picture;
448//     uint8_t q_context[16];
449    uint8_t header_state[32];
450    uint8_t block_state[128 + 32*128];
451    int keyframe;
452    int always_reset;
453    int version;
454    int spatial_decomposition_type;
455    int last_spatial_decomposition_type;
456    int temporal_decomposition_type;
457    int spatial_decomposition_count;
458    int last_spatial_decomposition_count;
459    int temporal_decomposition_count;
460    int max_ref_frames;
461    int ref_frames;
462    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463    uint32_t *ref_scores[MAX_REF_FRAMES];
464    DWTELEM *spatial_dwt_buffer;
465    IDWTELEM *spatial_idwt_buffer;
466    int colorspace_type;
467    int chroma_h_shift;
468    int chroma_v_shift;
469    int spatial_scalability;
470    int qlog;
471    int last_qlog;
472    int lambda;
473    int lambda2;
474    int pass1_rc;
475    int mv_scale;
476    int last_mv_scale;
477    int qbias;
478    int last_qbias;
479#define QBIAS_SHIFT 3
480    int b_width;
481    int b_height;
482    int block_max_depth;
483    int last_block_max_depth;
484    Plane plane[MAX_PLANES];
485    BlockNode *block;
486#define ME_CACHE_SIZE 1024
487    int me_cache[ME_CACHE_SIZE];
488    int me_cache_generation;
489    slice_buffer sb;
490
491    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
492
493    uint8_t *scratchbuf;
494}SnowContext;
495
496typedef struct {
497    IDWTELEM *b0;
498    IDWTELEM *b1;
499    IDWTELEM *b2;
500    IDWTELEM *b3;
501    int y;
502} DWTCompose;
503
504#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506
507static void iterative_me(SnowContext *s);
508
509static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510{
511    int i;
512
513    buf->base_buffer = base_buffer;
514    buf->line_count = line_count;
515    buf->line_width = line_width;
516    buf->data_count = max_allocated_lines;
517    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
519
520    for(i = 0; i < max_allocated_lines; i++){
521        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522    }
523
524    buf->data_stack_top = max_allocated_lines - 1;
525}
526
527static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
528{
529    int offset;
530    IDWTELEM * buffer;
531
532    assert(buf->data_stack_top >= 0);
533//  assert(!buf->line[line]);
534    if (buf->line[line])
535        return buf->line[line];
536
537    offset = buf->line_width * line;
538    buffer = buf->data_stack[buf->data_stack_top];
539    buf->data_stack_top--;
540    buf->line[line] = buffer;
541
542    return buffer;
543}
544
545static void slice_buffer_release(slice_buffer * buf, int line)
546{
547    int offset;
548    IDWTELEM * buffer;
549
550    assert(line >= 0 && line < buf->line_count);
551    assert(buf->line[line]);
552
553    offset = buf->line_width * line;
554    buffer = buf->line[line];
555    buf->data_stack_top++;
556    buf->data_stack[buf->data_stack_top] = buffer;
557    buf->line[line] = NULL;
558}
559
560static void slice_buffer_flush(slice_buffer * buf)
561{
562    int i;
563    for(i = 0; i < buf->line_count; i++){
564        if (buf->line[i])
565            slice_buffer_release(buf, i);
566    }
567}
568
569static void slice_buffer_destroy(slice_buffer * buf)
570{
571    int i;
572    slice_buffer_flush(buf);
573
574    for(i = buf->data_count - 1; i >= 0; i--){
575        av_freep(&buf->data_stack[i]);
576    }
577    av_freep(&buf->data_stack);
578    av_freep(&buf->line);
579}
580
581#ifdef __sgi
582// Avoid a name clash on SGI IRIX
583#undef qexp
584#endif
585#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586static uint8_t qexp[QROOT];
587
588static inline int mirror(int v, int m){
589    while((unsigned)v > (unsigned)m){
590        v=-v;
591        if(v<0) v+= 2*m;
592    }
593    return v;
594}
595
596static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597    int i;
598
599    if(v){
600        const int a= FFABS(v);
601        const int e= av_log2(a);
602#if 1
603        const int el= FFMIN(e, 10);
604        put_rac(c, state+0, 0);
605
606        for(i=0; i<el; i++){
607            put_rac(c, state+1+i, 1);  //1..10
608        }
609        for(; i<e; i++){
610            put_rac(c, state+1+9, 1);  //1..10
611        }
612        put_rac(c, state+1+FFMIN(i,9), 0);
613
614        for(i=e-1; i>=el; i--){
615            put_rac(c, state+22+9, (a>>i)&1); //22..31
616        }
617        for(; i>=0; i--){
618            put_rac(c, state+22+i, (a>>i)&1); //22..31
619        }
620
621        if(is_signed)
622            put_rac(c, state+11 + el, v < 0); //11..21
623#else
624
625        put_rac(c, state+0, 0);
626        if(e<=9){
627            for(i=0; i<e; i++){
628                put_rac(c, state+1+i, 1);  //1..10
629            }
630            put_rac(c, state+1+i, 0);
631
632            for(i=e-1; i>=0; i--){
633                put_rac(c, state+22+i, (a>>i)&1); //22..31
634            }
635
636            if(is_signed)
637                put_rac(c, state+11 + e, v < 0); //11..21
638        }else{
639            for(i=0; i<e; i++){
640                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
641            }
642            put_rac(c, state+1+FFMIN(i,9), 0);
643
644            for(i=e-1; i>=0; i--){
645                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646            }
647
648            if(is_signed)
649                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650        }
651#endif /* 1 */
652    }else{
653        put_rac(c, state+0, 1);
654    }
655}
656
657static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658    if(get_rac(c, state+0))
659        return 0;
660    else{
661        int i, e, a;
662        e= 0;
663        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
664            e++;
665        }
666
667        a= 1;
668        for(i=e-1; i>=0; i--){
669            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670        }
671
672        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
673            return -a;
674        else
675            return a;
676    }
677}
678
679static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
680    int i;
681    int r= log2>=0 ? 1<<log2 : 1;
682
683    assert(v>=0);
684    assert(log2>=-4);
685
686    while(v >= r){
687        put_rac(c, state+4+log2, 1);
688        v -= r;
689        log2++;
690        if(log2>0) r+=r;
691    }
692    put_rac(c, state+4+log2, 0);
693
694    for(i=log2-1; i>=0; i--){
695        put_rac(c, state+31-i, (v>>i)&1);
696    }
697}
698
699static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
700    int i;
701    int r= log2>=0 ? 1<<log2 : 1;
702    int v=0;
703
704    assert(log2>=-4);
705
706    while(get_rac(c, state+4+log2)){
707        v+= r;
708        log2++;
709        if(log2>0) r+=r;
710    }
711
712    for(i=log2-1; i>=0; i--){
713        v+= get_rac(c, state+31-i)<<i;
714    }
715
716    return v;
717}
718
719static av_always_inline void
720lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
721     int dst_step, int src_step, int ref_step,
722     int width, int mul, int add, int shift,
723     int highpass, int inverse){
724    const int mirror_left= !highpass;
725    const int mirror_right= (width&1) ^ highpass;
726    const int w= (width>>1) - 1 + (highpass & width);
727    int i;
728
729#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730    if(mirror_left){
731        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
732        dst += dst_step;
733        src += src_step;
734    }
735
736    for(i=0; i<w; i++){
737        dst[i*dst_step] =
738            LIFT(src[i*src_step],
739                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
740                 inverse);
741    }
742
743    if(mirror_right){
744        dst[w*dst_step] =
745            LIFT(src[w*src_step],
746                 ((mul*2*ref[w*ref_step]+add)>>shift),
747                 inverse);
748    }
749}
750
751static av_always_inline void
752inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
753         int dst_step, int src_step, int ref_step,
754         int width, int mul, int add, int shift,
755         int highpass, int inverse){
756    const int mirror_left= !highpass;
757    const int mirror_right= (width&1) ^ highpass;
758    const int w= (width>>1) - 1 + (highpass & width);
759    int i;
760
761#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
762    if(mirror_left){
763        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
764        dst += dst_step;
765        src += src_step;
766    }
767
768    for(i=0; i<w; i++){
769        dst[i*dst_step] =
770            LIFT(src[i*src_step],
771                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
772                 inverse);
773    }
774
775    if(mirror_right){
776        dst[w*dst_step] =
777            LIFT(src[w*src_step],
778                 ((mul*2*ref[w*ref_step]+add)>>shift),
779                 inverse);
780    }
781}
782
783#ifndef liftS
784static av_always_inline void
785liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
786      int dst_step, int src_step, int ref_step,
787      int width, int mul, int add, int shift,
788      int highpass, int inverse){
789    const int mirror_left= !highpass;
790    const int mirror_right= (width&1) ^ highpass;
791    const int w= (width>>1) - 1 + (highpass & width);
792    int i;
793
794    assert(shift == 4);
795#define LIFTS(src, ref, inv) \
796        ((inv) ? \
797            (src) + (((ref) + 4*(src))>>shift): \
798            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
799    if(mirror_left){
800        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
801        dst += dst_step;
802        src += src_step;
803    }
804
805    for(i=0; i<w; i++){
806        dst[i*dst_step] =
807            LIFTS(src[i*src_step],
808                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
809                  inverse);
810    }
811
812    if(mirror_right){
813        dst[w*dst_step] =
814            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
815    }
816}
817static av_always_inline void
818inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
819          int dst_step, int src_step, int ref_step,
820          int width, int mul, int add, int shift,
821          int highpass, int inverse){
822    const int mirror_left= !highpass;
823    const int mirror_right= (width&1) ^ highpass;
824    const int w= (width>>1) - 1 + (highpass & width);
825    int i;
826
827    assert(shift == 4);
828#define LIFTS(src, ref, inv) \
829    ((inv) ? \
830        (src) + (((ref) + 4*(src))>>shift): \
831        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
832    if(mirror_left){
833        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
834        dst += dst_step;
835        src += src_step;
836    }
837
838    for(i=0; i<w; i++){
839        dst[i*dst_step] =
840            LIFTS(src[i*src_step],
841                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
842                  inverse);
843    }
844
845    if(mirror_right){
846        dst[w*dst_step] =
847            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
848    }
849}
850#endif /* ! liftS */
851
852static void horizontal_decompose53i(DWTELEM *b, int width){
853    DWTELEM temp[width];
854    const int width2= width>>1;
855    int x;
856    const int w2= (width+1)>>1;
857
858    for(x=0; x<width2; x++){
859        temp[x   ]= b[2*x    ];
860        temp[x+w2]= b[2*x + 1];
861    }
862    if(width&1)
863        temp[x   ]= b[2*x    ];
864#if 0
865    {
866    int A1,A2,A3,A4;
867    A2= temp[1       ];
868    A4= temp[0       ];
869    A1= temp[0+width2];
870    A1 -= (A2 + A4)>>1;
871    A4 += (A1 + 1)>>1;
872    b[0+width2] = A1;
873    b[0       ] = A4;
874    for(x=1; x+1<width2; x+=2){
875        A3= temp[x+width2];
876        A4= temp[x+1     ];
877        A3 -= (A2 + A4)>>1;
878        A2 += (A1 + A3 + 2)>>2;
879        b[x+width2] = A3;
880        b[x       ] = A2;
881
882        A1= temp[x+1+width2];
883        A2= temp[x+2       ];
884        A1 -= (A2 + A4)>>1;
885        A4 += (A1 + A3 + 2)>>2;
886        b[x+1+width2] = A1;
887        b[x+1       ] = A4;
888    }
889    A3= temp[width-1];
890    A3 -= A2;
891    A2 += (A1 + A3 + 2)>>2;
892    b[width -1] = A3;
893    b[width2-1] = A2;
894    }
895#else
896    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
898#endif /* 0 */
899}
900
901static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902    int i;
903
904    for(i=0; i<width; i++){
905        b1[i] -= (b0[i] + b2[i])>>1;
906    }
907}
908
909static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910    int i;
911
912    for(i=0; i<width; i++){
913        b1[i] += (b0[i] + b2[i] + 2)>>2;
914    }
915}
916
917static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918    int y;
919    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
921
922    for(y=-2; y<height; y+=2){
923        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925
926        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
927        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
928
929        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
930        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
931
932        b0=b2;
933        b1=b3;
934    }
935}
936
937static void horizontal_decompose97i(DWTELEM *b, int width){
938    DWTELEM temp[width];
939    const int w2= (width+1)>>1;
940
941    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
942    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
943    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
944    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
945}
946
947
948static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
949    int i;
950
951    for(i=0; i<width; i++){
952        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
953    }
954}
955
956static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
957    int i;
958
959    for(i=0; i<width; i++){
960        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
961    }
962}
963
964static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
965    int i;
966
967    for(i=0; i<width; i++){
968#ifdef liftS
969        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970#else
971        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
972#endif
973    }
974}
975
976static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
977    int i;
978
979    for(i=0; i<width; i++){
980        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
981    }
982}
983
984static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
985    int y;
986    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
987    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
988    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
989    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
990
991    for(y=-4; y<height; y+=2){
992        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
993        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
994
995        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
996        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
997
998        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
999        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1002
1003        b0=b2;
1004        b1=b3;
1005        b2=b4;
1006        b3=b5;
1007    }
1008}
1009
1010void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1011    int level;
1012
1013    for(level=0; level<decomposition_count; level++){
1014        switch(type){
1015        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1017        }
1018    }
1019}
1020
1021static void horizontal_compose53i(IDWTELEM *b, int width){
1022    IDWTELEM temp[width];
1023    const int width2= width>>1;
1024    const int w2= (width+1)>>1;
1025    int x;
1026
1027#if 0
1028    int A1,A2,A3,A4;
1029    A2= temp[1       ];
1030    A4= temp[0       ];
1031    A1= temp[0+width2];
1032    A1 -= (A2 + A4)>>1;
1033    A4 += (A1 + 1)>>1;
1034    b[0+width2] = A1;
1035    b[0       ] = A4;
1036    for(x=1; x+1<width2; x+=2){
1037        A3= temp[x+width2];
1038        A4= temp[x+1     ];
1039        A3 -= (A2 + A4)>>1;
1040        A2 += (A1 + A3 + 2)>>2;
1041        b[x+width2] = A3;
1042        b[x       ] = A2;
1043
1044        A1= temp[x+1+width2];
1045        A2= temp[x+2       ];
1046        A1 -= (A2 + A4)>>1;
1047        A4 += (A1 + A3 + 2)>>2;
1048        b[x+1+width2] = A1;
1049        b[x+1       ] = A4;
1050    }
1051    A3= temp[width-1];
1052    A3 -= A2;
1053    A2 += (A1 + A3 + 2)>>2;
1054    b[width -1] = A3;
1055    b[width2-1] = A2;
1056#else
1057    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1058    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1059#endif /* 0 */
1060    for(x=0; x<width2; x++){
1061        b[2*x    ]= temp[x   ];
1062        b[2*x + 1]= temp[x+w2];
1063    }
1064    if(width&1)
1065        b[2*x    ]= temp[x   ];
1066}
1067
1068static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069    int i;
1070
1071    for(i=0; i<width; i++){
1072        b1[i] += (b0[i] + b2[i])>>1;
1073    }
1074}
1075
1076static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1077    int i;
1078
1079    for(i=0; i<width; i++){
1080        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081    }
1082}
1083
1084static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1085    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1087    cs->y = -1;
1088}
1089
1090static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1091    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1093    cs->y = -1;
1094}
1095
1096static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1097    int y= cs->y;
1098
1099    IDWTELEM *b0= cs->b0;
1100    IDWTELEM *b1= cs->b1;
1101    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1103
1104        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1106
1107        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1109
1110    cs->b0 = b2;
1111    cs->b1 = b3;
1112    cs->y += 2;
1113}
1114
1115static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1116    int y= cs->y;
1117    IDWTELEM *b0= cs->b0;
1118    IDWTELEM *b1= cs->b1;
1119    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1121
1122        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1124
1125        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1127
1128    cs->b0 = b2;
1129    cs->b1 = b3;
1130    cs->y += 2;
1131}
1132
1133static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1134    DWTCompose cs;
1135    spatial_compose53i_init(&cs, buffer, height, stride);
1136    while(cs.y <= height)
1137        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138}
1139
1140
1141void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142    IDWTELEM temp[width];
1143    const int w2= (width+1)>>1;
1144
1145    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1146    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1147    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1148    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1149}
1150
1151static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152    int i;
1153
1154    for(i=0; i<width; i++){
1155        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156    }
1157}
1158
1159static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160    int i;
1161
1162    for(i=0; i<width; i++){
1163        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1164    }
1165}
1166
1167static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1168    int i;
1169
1170    for(i=0; i<width; i++){
1171#ifdef liftS
1172        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1173#else
1174        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175#endif
1176    }
1177}
1178
1179static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1180    int i;
1181
1182    for(i=0; i<width; i++){
1183        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184    }
1185}
1186
1187void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1188    int i;
1189
1190    for(i=0; i<width; i++){
1191        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1192        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1193#ifdef liftS
1194        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1195#else
1196        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197#endif
1198        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199    }
1200}
1201
1202static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1203    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1205    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207    cs->y = -3;
1208}
1209
1210static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1211    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1213    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215    cs->y = -3;
1216}
1217
1218static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1219    int y = cs->y;
1220
1221    IDWTELEM *b0= cs->b0;
1222    IDWTELEM *b1= cs->b1;
1223    IDWTELEM *b2= cs->b2;
1224    IDWTELEM *b3= cs->b3;
1225    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1227
1228    if(y>0 && y+4<height){
1229        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1230    }else{
1231        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1235    }
1236
1237        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1239
1240    cs->b0=b2;
1241    cs->b1=b3;
1242    cs->b2=b4;
1243    cs->b3=b5;
1244    cs->y += 2;
1245}
1246
1247static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1248    int y = cs->y;
1249    IDWTELEM *b0= cs->b0;
1250    IDWTELEM *b1= cs->b1;
1251    IDWTELEM *b2= cs->b2;
1252    IDWTELEM *b3= cs->b3;
1253    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255
1256        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1260
1261        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1263
1264    cs->b0=b2;
1265    cs->b1=b3;
1266    cs->b2=b4;
1267    cs->b3=b5;
1268    cs->y += 2;
1269}
1270
1271static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1272    DWTCompose cs;
1273    spatial_compose97i_init(&cs, buffer, height, stride);
1274    while(cs.y <= height)
1275        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1276}
1277
1278static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1279    int level;
1280    for(level=decomposition_count-1; level>=0; level--){
1281        switch(type){
1282        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1284        }
1285    }
1286}
1287
1288static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1289    int level;
1290    for(level=decomposition_count-1; level>=0; level--){
1291        switch(type){
1292        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1294        }
1295    }
1296}
1297
1298static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1299    const int support = type==1 ? 3 : 5;
1300    int level;
1301    if(type==2) return;
1302
1303    for(level=decomposition_count-1; level>=0; level--){
1304        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305            switch(type){
1306            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1307                break;
1308            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1309                break;
1310            }
1311        }
1312    }
1313}
1314
1315static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1316    const int support = type==1 ? 3 : 5;
1317    int level;
1318    if(type==2) return;
1319
1320    for(level=decomposition_count-1; level>=0; level--){
1321        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322            switch(type){
1323            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1324                break;
1325            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1326                break;
1327            }
1328        }
1329    }
1330}
1331
1332static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1333        DWTCompose cs[MAX_DECOMPOSITIONS];
1334        int y;
1335        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336        for(y=0; y<height; y+=4)
1337            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1338}
1339
1340static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1341    const int w= b->width;
1342    const int h= b->height;
1343    int x, y;
1344
1345    if(1){
1346        int run=0;
1347        int runs[w*h];
1348        int run_index=0;
1349        int max_index;
1350
1351        for(y=0; y<h; y++){
1352            for(x=0; x<w; x++){
1353                int v, p=0;
1354                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355                v= src[x + y*stride];
1356
1357                if(y){
1358                    t= src[x + (y-1)*stride];
1359                    if(x){
1360                        lt= src[x - 1 + (y-1)*stride];
1361                    }
1362                    if(x + 1 < w){
1363                        rt= src[x + 1 + (y-1)*stride];
1364                    }
1365                }
1366                if(x){
1367                    l= src[x - 1 + y*stride];
1368                    /*if(x > 1){
1369                        if(orientation==1) ll= src[y + (x-2)*stride];
1370                        else               ll= src[x - 2 + y*stride];
1371                    }*/
1372                }
1373                if(parent){
1374                    int px= x>>1;
1375                    int py= y>>1;
1376                    if(px<b->parent->width && py<b->parent->height)
1377                        p= parent[px + py*2*stride];
1378                }
1379                if(!(/*ll|*/l|lt|t|rt|p)){
1380                    if(v){
1381                        runs[run_index++]= run;
1382                        run=0;
1383                    }else{
1384                        run++;
1385                    }
1386                }
1387            }
1388        }
1389        max_index= run_index;
1390        runs[run_index++]= run;
1391        run_index=0;
1392        run= runs[run_index++];
1393
1394        put_symbol2(&s->c, b->state[30], max_index, 0);
1395        if(run_index <= max_index)
1396            put_symbol2(&s->c, b->state[1], run, 3);
1397
1398        for(y=0; y<h; y++){
1399            if(s->c.bytestream_end - s->c.bytestream < w*40){
1400                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401                return -1;
1402            }
1403            for(x=0; x<w; x++){
1404                int v, p=0;
1405                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1406                v= src[x + y*stride];
1407
1408                if(y){
1409                    t= src[x + (y-1)*stride];
1410                    if(x){
1411                        lt= src[x - 1 + (y-1)*stride];
1412                    }
1413                    if(x + 1 < w){
1414                        rt= src[x + 1 + (y-1)*stride];
1415                    }
1416                }
1417                if(x){
1418                    l= src[x - 1 + y*stride];
1419                    /*if(x > 1){
1420                        if(orientation==1) ll= src[y + (x-2)*stride];
1421                        else               ll= src[x - 2 + y*stride];
1422                    }*/
1423                }
1424                if(parent){
1425                    int px= x>>1;
1426                    int py= y>>1;
1427                    if(px<b->parent->width && py<b->parent->height)
1428                        p= parent[px + py*2*stride];
1429                }
1430                if(/*ll|*/l|lt|t|rt|p){
1431                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1432
1433                    put_rac(&s->c, &b->state[0][context], !!v);
1434                }else{
1435                    if(!run){
1436                        run= runs[run_index++];
1437
1438                        if(run_index <= max_index)
1439                            put_symbol2(&s->c, b->state[1], run, 3);
1440                        assert(v);
1441                    }else{
1442                        run--;
1443                        assert(!v);
1444                    }
1445                }
1446                if(v){
1447                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448                    int l2= 2*FFABS(l) + (l<0);
1449                    int t2= 2*FFABS(t) + (t<0);
1450
1451                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1452                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1453                }
1454            }
1455        }
1456    }
1457    return 0;
1458}
1459
1460static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1461//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1462//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1463    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1464//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1465}
1466
1467static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1468    const int w= b->width;
1469    const int h= b->height;
1470    int x,y;
1471
1472    if(1){
1473        int run, runs;
1474        x_and_coeff *xc= b->x_coeff;
1475        x_and_coeff *prev_xc= NULL;
1476        x_and_coeff *prev2_xc= xc;
1477        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478        x_and_coeff *prev_parent_xc= parent_xc;
1479
1480        runs= get_symbol2(&s->c, b->state[30], 0);
1481        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482        else           run= INT_MAX;
1483
1484        for(y=0; y<h; y++){
1485            int v=0;
1486            int lt=0, t=0, rt=0;
1487
1488            if(y && prev_xc->x == 0){
1489                rt= prev_xc->coeff;
1490            }
1491            for(x=0; x<w; x++){
1492                int p=0;
1493                const int l= v;
1494
1495                lt= t; t= rt;
1496
1497                if(y){
1498                    if(prev_xc->x <= x)
1499                        prev_xc++;
1500                    if(prev_xc->x == x + 1)
1501                        rt= prev_xc->coeff;
1502                    else
1503                        rt=0;
1504                }
1505                if(parent_xc){
1506                    if(x>>1 > parent_xc->x){
1507                        parent_xc++;
1508                    }
1509                    if(x>>1 == parent_xc->x){
1510                        p= parent_xc->coeff;
1511                    }
1512                }
1513                if(/*ll|*/l|lt|t|rt|p){
1514                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1515
1516                    v=get_rac(&s->c, &b->state[0][context]);
1517                    if(v){
1518                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1520
1521                        xc->x=x;
1522                        (xc++)->coeff= v;
1523                    }
1524                }else{
1525                    if(!run){
1526                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527                        else           run= INT_MAX;
1528                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1530
1531                        xc->x=x;
1532                        (xc++)->coeff= v;
1533                    }else{
1534                        int max_run;
1535                        run--;
1536                        v=0;
1537
1538                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1539                        else  max_run= FFMIN(run, w-x-1);
1540                        if(parent_xc)
1541                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1542                        x+= max_run;
1543                        run-= max_run;
1544                    }
1545                }
1546            }
1547            (xc++)->x= w+1; //end marker
1548            prev_xc= prev2_xc;
1549            prev2_xc= xc;
1550
1551            if(parent_xc){
1552                if(y&1){
1553                    while(parent_xc->x != parent->width+1)
1554                        parent_xc++;
1555                    parent_xc++;
1556                    prev_parent_xc= parent_xc;
1557                }else{
1558                    parent_xc= prev_parent_xc;
1559                }
1560            }
1561        }
1562
1563        (xc++)->x= w+1; //end marker
1564    }
1565}
1566
1567static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568    const int w= b->width;
1569    int y;
1570    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1571    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1572    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573    int new_index = 0;
1574
1575    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1576        qadd= 0;
1577        qmul= 1<<QEXPSHIFT;
1578    }
1579
1580    /* If we are on the second or later slice, restore our index. */
1581    if (start_y != 0)
1582        new_index = save_state[0];
1583
1584
1585    for(y=start_y; y<h; y++){
1586        int x = 0;
1587        int v;
1588        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589        memset(line, 0, b->width*sizeof(IDWTELEM));
1590        v = b->x_coeff[new_index].coeff;
1591        x = b->x_coeff[new_index++].x;
1592        while(x < w){
1593            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594            register int u= -(v&1);
1595            line[x] = (t^u) - u;
1596
1597            v = b->x_coeff[new_index].coeff;
1598            x = b->x_coeff[new_index++].x;
1599        }
1600    }
1601
1602    /* Save our variables for the next slice. */
1603    save_state[0] = new_index;
1604
1605    return;
1606}
1607
1608static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1609    int plane_index, level, orientation;
1610
1611    for(plane_index=0; plane_index<3; plane_index++){
1612        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1613            for(orientation=level ? 1:0; orientation<4; orientation++){
1614                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615            }
1616        }
1617    }
1618    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1620}
1621
1622static int alloc_blocks(SnowContext *s){
1623    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1625
1626    s->b_width = w;
1627    s->b_height= h;
1628
1629    av_free(s->block);
1630    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1631    return 0;
1632}
1633
1634static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1635    uint8_t *bytestream= d->bytestream;
1636    uint8_t *bytestream_start= d->bytestream_start;
1637    *d= *s;
1638    d->bytestream= bytestream;
1639    d->bytestream_start= bytestream_start;
1640}
1641
1642//near copy & paste from dsputil, FIXME
1643static int pix_sum(uint8_t * pix, int line_size, int w)
1644{
1645    int s, i, j;
1646
1647    s = 0;
1648    for (i = 0; i < w; i++) {
1649        for (j = 0; j < w; j++) {
1650            s += pix[0];
1651            pix ++;
1652        }
1653        pix += line_size - w;
1654    }
1655    return s;
1656}
1657
1658//near copy & paste from dsputil, FIXME
1659static int pix_norm1(uint8_t * pix, int line_size, int w)
1660{
1661    int s, i, j;
1662    uint32_t *sq = ff_squareTbl + 256;
1663
1664    s = 0;
1665    for (i = 0; i < w; i++) {
1666        for (j = 0; j < w; j ++) {
1667            s += sq[pix[0]];
1668            pix ++;
1669        }
1670        pix += line_size - w;
1671    }
1672    return s;
1673}
1674
1675static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1676    const int w= s->b_width << s->block_max_depth;
1677    const int rem_depth= s->block_max_depth - level;
1678    const int index= (x + y*w) << rem_depth;
1679    const int block_w= 1<<rem_depth;
1680    BlockNode block;
1681    int i,j;
1682
1683    block.color[0]= l;
1684    block.color[1]= cb;
1685    block.color[2]= cr;
1686    block.mx= mx;
1687    block.my= my;
1688    block.ref= ref;
1689    block.type= type;
1690    block.level= level;
1691
1692    for(j=0; j<block_w; j++){
1693        for(i=0; i<block_w; i++){
1694            s->block[index + i + j*w]= block;
1695        }
1696    }
1697}
1698
1699static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1700    const int offset[3]= {
1701          y*c->  stride + x,
1702        ((y*c->uvstride + x)>>1),
1703        ((y*c->uvstride + x)>>1),
1704    };
1705    int i;
1706    for(i=0; i<3; i++){
1707        c->src[0][i]= src [i];
1708        c->ref[0][i]= ref [i] + offset[i];
1709    }
1710    assert(!ref_index);
1711}
1712
1713static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1714                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1715    if(s->ref_frames == 1){
1716        *mx = mid_pred(left->mx, top->mx, tr->mx);
1717        *my = mid_pred(left->my, top->my, tr->my);
1718    }else{
1719        const int *scale = scale_mv_ref[ref];
1720        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1721                       (top ->mx * scale[top ->ref] + 128) >>8,
1722                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1723        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1724                       (top ->my * scale[top ->ref] + 128) >>8,
1725                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1726    }
1727}
1728
1729//FIXME copy&paste
1730#define P_LEFT P[1]
1731#define P_TOP P[2]
1732#define P_TOPRIGHT P[3]
1733#define P_MEDIAN P[4]
1734#define P_MV1 P[9]
1735#define FLAG_QPEL   1 //must be 1
1736
1737static int encode_q_branch(SnowContext *s, int level, int x, int y){
1738    uint8_t p_buffer[1024];
1739    uint8_t i_buffer[1024];
1740    uint8_t p_state[sizeof(s->block_state)];
1741    uint8_t i_state[sizeof(s->block_state)];
1742    RangeCoder pc, ic;
1743    uint8_t *pbbak= s->c.bytestream;
1744    uint8_t *pbbak_start= s->c.bytestream_start;
1745    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1746    const int w= s->b_width  << s->block_max_depth;
1747    const int h= s->b_height << s->block_max_depth;
1748    const int rem_depth= s->block_max_depth - level;
1749    const int index= (x + y*w) << rem_depth;
1750    const int block_w= 1<<(LOG2_MB_SIZE - level);
1751    int trx= (x+1)<<rem_depth;
1752    int try= (y+1)<<rem_depth;
1753    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1754    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1755    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1756    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1757    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1758    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1759    int pl = left->color[0];
1760    int pcb= left->color[1];
1761    int pcr= left->color[2];
1762    int pmx, pmy;
1763    int mx=0, my=0;
1764    int l,cr,cb;
1765    const int stride= s->current_picture.linesize[0];
1766    const int uvstride= s->current_picture.linesize[1];
1767    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1768                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1769                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1770    int P[10][2];
1771    int16_t last_mv[3][2];
1772    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1773    const int shift= 1+qpel;
1774    MotionEstContext *c= &s->m.me;
1775    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1776    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1777    int my_context= av_log2(2*FFABS(left->my - top->my));
1778    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1779    int ref, best_ref, ref_score, ref_mx, ref_my;
1780
1781    assert(sizeof(s->block_state) >= 256);
1782    if(s->keyframe){
1783        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1784        return 0;
1785    }
1786
1787//    clip predictors / edge ?
1788
1789    P_LEFT[0]= left->mx;
1790    P_LEFT[1]= left->my;
1791    P_TOP [0]= top->mx;
1792    P_TOP [1]= top->my;
1793    P_TOPRIGHT[0]= tr->mx;
1794    P_TOPRIGHT[1]= tr->my;
1795
1796    last_mv[0][0]= s->block[index].mx;
1797    last_mv[0][1]= s->block[index].my;
1798    last_mv[1][0]= right->mx;
1799    last_mv[1][1]= right->my;
1800    last_mv[2][0]= bottom->mx;
1801    last_mv[2][1]= bottom->my;
1802
1803    s->m.mb_stride=2;
1804    s->m.mb_x=
1805    s->m.mb_y= 0;
1806    c->skip= 0;
1807
1808    assert(c->  stride ==   stride);
1809    assert(c->uvstride == uvstride);
1810
1811    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1812    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1813    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1814    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1815
1816    c->xmin = - x*block_w - 16+2;
1817    c->ymin = - y*block_w - 16+2;
1818    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1820
1821    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1822    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1823    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1824    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1825    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1826    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1827    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1828
1829    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1830    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831
1832    if (!y) {
1833        c->pred_x= P_LEFT[0];
1834        c->pred_y= P_LEFT[1];
1835    } else {
1836        c->pred_x = P_MEDIAN[0];
1837        c->pred_y = P_MEDIAN[1];
1838    }
1839
1840    score= INT_MAX;
1841    best_ref= 0;
1842    for(ref=0; ref<s->ref_frames; ref++){
1843        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1844
1845        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1846                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1847
1848        assert(ref_mx >= c->xmin);
1849        assert(ref_mx <= c->xmax);
1850        assert(ref_my >= c->ymin);
1851        assert(ref_my <= c->ymax);
1852
1853        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1854        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1855        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1856        if(s->ref_mvs[ref]){
1857            s->ref_mvs[ref][index][0]= ref_mx;
1858            s->ref_mvs[ref][index][1]= ref_my;
1859            s->ref_scores[ref][index]= ref_score;
1860        }
1861        if(score > ref_score){
1862            score= ref_score;
1863            best_ref= ref;
1864            mx= ref_mx;
1865            my= ref_my;
1866        }
1867    }
1868    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1869
1870  //  subpel search
1871    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1872    pc= s->c;
1873    pc.bytestream_start=
1874    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1875    memcpy(p_state, s->block_state, sizeof(s->block_state));
1876
1877    if(level!=s->block_max_depth)
1878        put_rac(&pc, &p_state[4 + s_context], 1);
1879    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1880    if(s->ref_frames > 1)
1881        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1882    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1883    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1884    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1885    p_len= pc.bytestream - pc.bytestream_start;
1886    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1887
1888    block_s= block_w*block_w;
1889    sum = pix_sum(current_data[0], stride, block_w);
1890    l= (sum + block_s/2)/block_s;
1891    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1892
1893    block_s= block_w*block_w>>2;
1894    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1895    cb= (sum + block_s/2)/block_s;
1896//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1897    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1898    cr= (sum + block_s/2)/block_s;
1899//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900
1901    ic= s->c;
1902    ic.bytestream_start=
1903    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1904    memcpy(i_state, s->block_state, sizeof(s->block_state));
1905    if(level!=s->block_max_depth)
1906        put_rac(&ic, &i_state[4 + s_context], 1);
1907    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1908    put_symbol(&ic, &i_state[32],  l-pl , 1);
1909    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1910    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1911    i_len= ic.bytestream - ic.bytestream_start;
1912    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1913
1914//    assert(score==256*256*256*64-1);
1915    assert(iscore < 255*255*256 + s->lambda2*10);
1916    assert(iscore >= 0);
1917    assert(l>=0 && l<=255);
1918    assert(pl>=0 && pl<=255);
1919
1920    if(level==0){
1921        int varc= iscore >> 8;
1922        int vard= score >> 8;
1923        if (vard <= 64 || vard < varc)
1924            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1925        else
1926            c->scene_change_score+= s->m.qscale;
1927    }
1928
1929    if(level!=s->block_max_depth){
1930        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1931        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1932        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1933        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1934        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1935        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1936
1937        if(score2 < score && score2 < iscore)
1938            return score2;
1939    }
1940
1941    if(iscore < score){
1942        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1943        memcpy(pbbak, i_buffer, i_len);
1944        s->c= ic;
1945        s->c.bytestream_start= pbbak_start;
1946        s->c.bytestream= pbbak + i_len;
1947        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1948        memcpy(s->block_state, i_state, sizeof(s->block_state));
1949        return iscore;
1950    }else{
1951        memcpy(pbbak, p_buffer, p_len);
1952        s->c= pc;
1953        s->c.bytestream_start= pbbak_start;
1954        s->c.bytestream= pbbak + p_len;
1955        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1956        memcpy(s->block_state, p_state, sizeof(s->block_state));
1957        return score;
1958    }
1959}
1960
1961static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1962    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1963        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1964    }else{
1965        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1966    }
1967}
1968
1969static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1970    const int w= s->b_width  << s->block_max_depth;
1971    const int rem_depth= s->block_max_depth - level;
1972    const int index= (x + y*w) << rem_depth;
1973    int trx= (x+1)<<rem_depth;
1974    BlockNode *b= &s->block[index];
1975    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1976    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1977    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1978    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1979    int pl = left->color[0];
1980    int pcb= left->color[1];
1981    int pcr= left->color[2];
1982    int pmx, pmy;
1983    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1984    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1985    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1986    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987
1988    if(s->keyframe){
1989        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1990        return;
1991    }
1992
1993    if(level!=s->block_max_depth){
1994        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1995            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1996        }else{
1997            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1998            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1999            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2000            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2001            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2002            return;
2003        }
2004    }
2005    if(b->type & BLOCK_INTRA){
2006        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2007        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2008        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2009        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2010        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2011        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2012    }else{
2013        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2014        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2015        if(s->ref_frames > 1)
2016            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2017        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2018        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2019        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2020    }
2021}
2022
2023static void decode_q_branch(SnowContext *s, int level, int x, int y){
2024    const int w= s->b_width << s->block_max_depth;
2025    const int rem_depth= s->block_max_depth - level;
2026    const int index= (x + y*w) << rem_depth;
2027    int trx= (x+1)<<rem_depth;
2028    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2029    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2030    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2031    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2032    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033
2034    if(s->keyframe){
2035        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2036        return;
2037    }
2038
2039    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2040        int type, mx, my;
2041        int l = left->color[0];
2042        int cb= left->color[1];
2043        int cr= left->color[2];
2044        int ref = 0;
2045        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2046        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2047        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2048
2049        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2050
2051        if(type){
2052            pred_mv(s, &mx, &my, 0, left, top, tr);
2053            l += get_symbol(&s->c, &s->block_state[32], 1);
2054            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2055            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2056        }else{
2057            if(s->ref_frames > 1)
2058                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2059            pred_mv(s, &mx, &my, ref, left, top, tr);
2060            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2061            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2062        }
2063        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2064    }else{
2065        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2066        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2067        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2068        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2069    }
2070}
2071
2072static void encode_blocks(SnowContext *s, int search){
2073    int x, y;
2074    int w= s->b_width;
2075    int h= s->b_height;
2076
2077    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2078        iterative_me(s);
2079
2080    for(y=0; y<h; y++){
2081        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2082            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2083            return;
2084        }
2085        for(x=0; x<w; x++){
2086            if(s->avctx->me_method == ME_ITER || !search)
2087                encode_q_branch2(s, 0, x, y);
2088            else
2089                encode_q_branch (s, 0, x, y);
2090        }
2091    }
2092}
2093
2094static void decode_blocks(SnowContext *s){
2095    int x, y;
2096    int w= s->b_width;
2097    int h= s->b_height;
2098
2099    for(y=0; y<h; y++){
2100        for(x=0; x<w; x++){
2101            decode_q_branch(s, 0, x, y);
2102        }
2103    }
2104}
2105
2106static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2107    static const uint8_t weight[64]={
2108    8,7,6,5,4,3,2,1,
2109    7,7,0,0,0,0,0,1,
2110    6,0,6,0,0,0,2,0,
2111    5,0,0,5,0,3,0,0,
2112    4,0,0,0,4,0,0,0,
2113    3,0,0,5,0,3,0,0,
2114    2,0,6,0,0,0,2,0,
2115    1,7,0,0,0,0,0,1,
2116    };
2117
2118    static const uint8_t brane[256]={
2119    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2120    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2121    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2122    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2123    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2124    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2125    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2126    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2127    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2128    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2129    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2130    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2131    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2132    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2133    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2134    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2135    };
2136
2137    static const uint8_t needs[16]={
2138    0,1,0,0,
2139    2,4,2,0,
2140    0,1,0,0,
2141    15
2142    };
2143
2144    int x, y, b, r, l;
2145    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2146    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2147    int16_t *tmpI= tmpIt;
2148    uint8_t *tmp2= tmp2t[0];
2149    const uint8_t *hpel[11];
2150    assert(dx<16 && dy<16);
2151    r= brane[dx + 16*dy]&15;
2152    l= brane[dx + 16*dy]>>4;
2153
2154    b= needs[l] | needs[r];
2155    if(p && !p->diag_mc)
2156        b= 15;
2157
2158    if(b&5){
2159        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2160            for(x=0; x < b_w; x++){
2161                int a_1=src[x + HTAPS_MAX/2-4];
2162                int a0= src[x + HTAPS_MAX/2-3];
2163                int a1= src[x + HTAPS_MAX/2-2];
2164                int a2= src[x + HTAPS_MAX/2-1];
2165                int a3= src[x + HTAPS_MAX/2+0];
2166                int a4= src[x + HTAPS_MAX/2+1];
2167                int a5= src[x + HTAPS_MAX/2+2];
2168                int a6= src[x + HTAPS_MAX/2+3];
2169                int am=0;
2170                if(!p || p->fast_mc){
2171                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2172                    tmpI[x]= am;
2173                    am= (am+16)>>5;
2174                }else{
2175                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2176                    tmpI[x]= am;
2177                    am= (am+32)>>6;
2178                }
2179
2180                if(am&(~255)) am= ~(am>>31);
2181                tmp2[x]= am;
2182            }
2183            tmpI+= 64;
2184            tmp2+= stride;
2185            src += stride;
2186        }
2187        src -= stride*y;
2188    }
2189    src += HTAPS_MAX/2 - 1;
2190    tmp2= tmp2t[1];
2191
2192    if(b&2){
2193        for(y=0; y < b_h; y++){
2194            for(x=0; x < b_w+1; x++){
2195                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2196                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2197                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2198                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2199                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2200                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2201                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2202                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2203                int am=0;
2204                if(!p || p->fast_mc)
2205                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2206                else
2207                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2208
2209                if(am&(~255)) am= ~(am>>31);
2210                tmp2[x]= am;
2211            }
2212            src += stride;
2213            tmp2+= stride;
2214        }
2215        src -= stride*y;
2216    }
2217    src += stride*(HTAPS_MAX/2 - 1);
2218    tmp2= tmp2t[2];
2219    tmpI= tmpIt;
2220    if(b&4){
2221        for(y=0; y < b_h; y++){
2222            for(x=0; x < b_w; x++){
2223                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2224                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2225                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2226                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2227                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2228                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2229                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2230                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2231                int am=0;
2232                if(!p || p->fast_mc)
2233                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2234                else
2235                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2236                if(am&(~255)) am= ~(am>>31);
2237                tmp2[x]= am;
2238            }
2239            tmpI+= 64;
2240            tmp2+= stride;
2241        }
2242    }
2243
2244    hpel[ 0]= src;
2245    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2246    hpel[ 2]= src + 1;
2247
2248    hpel[ 4]= tmp2t[1];
2249    hpel[ 5]= tmp2t[2];
2250    hpel[ 6]= tmp2t[1] + 1;
2251
2252    hpel[ 8]= src + stride;
2253    hpel[ 9]= hpel[1] + stride;
2254    hpel[10]= hpel[8] + 1;
2255
2256    if(b==15){
2257        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2258        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2259        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2260        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2261        dx&=7;
2262        dy&=7;
2263        for(y=0; y < b_h; y++){
2264            for(x=0; x < b_w; x++){
2265                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2266                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2267            }
2268            src1+=stride;
2269            src2+=stride;
2270            src3+=stride;
2271            src4+=stride;
2272            dst +=stride;
2273        }
2274    }else{
2275        const uint8_t *src1= hpel[l];
2276        const uint8_t *src2= hpel[r];
2277        int a= weight[((dx&7) + (8*(dy&7)))];
2278        int b= 8-a;
2279        for(y=0; y < b_h; y++){
2280            for(x=0; x < b_w; x++){
2281                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2282            }
2283            src1+=stride;
2284            src2+=stride;
2285            dst +=stride;
2286        }
2287    }
2288}
2289
2290#define mca(dx,dy,b_w)\
2291static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2292    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2293    assert(h==b_w);\
2294    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2295}
2296
2297mca( 0, 0,16)
2298mca( 8, 0,16)
2299mca( 0, 8,16)
2300mca( 8, 8,16)
2301mca( 0, 0,8)
2302mca( 8, 0,8)
2303mca( 0, 8,8)
2304mca( 8, 8,8)
2305
2306static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2307    if(block->type & BLOCK_INTRA){
2308        int x, y;
2309        const int color = block->color[plane_index];
2310        const int color4= color*0x01010101;
2311        if(b_w==32){
2312            for(y=0; y < b_h; y++){
2313                *(uint32_t*)&dst[0 + y*stride]= color4;
2314                *(uint32_t*)&dst[4 + y*stride]= color4;
2315                *(uint32_t*)&dst[8 + y*stride]= color4;
2316                *(uint32_t*)&dst[12+ y*stride]= color4;
2317                *(uint32_t*)&dst[16+ y*stride]= color4;
2318                *(uint32_t*)&dst[20+ y*stride]= color4;
2319                *(uint32_t*)&dst[24+ y*stride]= color4;
2320                *(uint32_t*)&dst[28+ y*stride]= color4;
2321            }
2322        }else if(b_w==16){
2323            for(y=0; y < b_h; y++){
2324                *(uint32_t*)&dst[0 + y*stride]= color4;
2325                *(uint32_t*)&dst[4 + y*stride]= color4;
2326                *(uint32_t*)&dst[8 + y*stride]= color4;
2327                *(uint32_t*)&dst[12+ y*stride]= color4;
2328            }
2329        }else if(b_w==8){
2330            for(y=0; y < b_h; y++){
2331                *(uint32_t*)&dst[0 + y*stride]= color4;
2332                *(uint32_t*)&dst[4 + y*stride]= color4;
2333            }
2334        }else if(b_w==4){
2335            for(y=0; y < b_h; y++){
2336                *(uint32_t*)&dst[0 + y*stride]= color4;
2337            }
2338        }else{
2339            for(y=0; y < b_h; y++){
2340                for(x=0; x < b_w; x++){
2341                    dst[x + y*stride]= color;
2342                }
2343            }
2344        }
2345    }else{
2346        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2347        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2348        int mx= block->mx*scale;
2349        int my= block->my*scale;
2350        const int dx= mx&15;
2351        const int dy= my&15;
2352        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2353        sx += (mx>>4) - (HTAPS_MAX/2-1);
2354        sy += (my>>4) - (HTAPS_MAX/2-1);
2355        src += sx + sy*stride;
2356        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2357           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2358            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2359            src= tmp + MB_SIZE;
2360        }
2361//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2362//        assert(!(b_w&(b_w-1)));
2363        assert(b_w>1 && b_h>1);
2364        assert((tab_index>=0 && tab_index<4) || b_w==32);
2365        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2366            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2367        else if(b_w==32){
2368            int y;
2369            for(y=0; y<b_h; y+=16){
2370                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2371                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2372            }
2373        }else if(b_w==b_h)
2374            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2375        else if(b_w==2*b_h){
2376            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2377            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2378        }else{
2379            assert(2*b_w==b_h);
2380            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2381            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2382        }
2383    }
2384}
2385
2386void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2387                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2388    int y, x;
2389    IDWTELEM * dst;
2390    for(y=0; y<b_h; y++){
2391        //FIXME ugly misuse of obmc_stride
2392        const uint8_t *obmc1= obmc + y*obmc_stride;
2393        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2394        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2395        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2396        dst = slice_buffer_get_line(sb, src_y + y);
2397        for(x=0; x<b_w; x++){
2398            int v=   obmc1[x] * block[3][x + y*src_stride]
2399                    +obmc2[x] * block[2][x + y*src_stride]
2400                    +obmc3[x] * block[1][x + y*src_stride]
2401                    +obmc4[x] * block[0][x + y*src_stride];
2402
2403            v <<= 8 - LOG2_OBMC_MAX;
2404            if(FRAC_BITS != 8){
2405                v >>= 8 - FRAC_BITS;
2406            }
2407            if(add){
2408                v += dst[x + src_x];
2409                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2410                if(v&(~255)) v= ~(v>>31);
2411                dst8[x + y*src_stride] = v;
2412            }else{
2413                dst[x + src_x] -= v;
2414            }
2415        }
2416    }
2417}
2418
2419//FIXME name cleanup (b_w, block_w, b_width stuff)
2420static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2421    const int b_width = s->b_width  << s->block_max_depth;
2422    const int b_height= s->b_height << s->block_max_depth;
2423    const int b_stride= b_width;
2424    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2425    BlockNode *rt= lt+1;
2426    BlockNode *lb= lt+b_stride;
2427    BlockNode *rb= lb+1;
2428    uint8_t *block[4];
2429    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2430    uint8_t *tmp = s->scratchbuf;
2431    uint8_t *ptmp;
2432    int x,y;
2433
2434    if(b_x<0){
2435        lt= rt;
2436        lb= rb;
2437    }else if(b_x + 1 >= b_width){
2438        rt= lt;
2439        rb= lb;
2440    }
2441    if(b_y<0){
2442        lt= lb;
2443        rt= rb;
2444    }else if(b_y + 1 >= b_height){
2445        lb= lt;
2446        rb= rt;
2447    }
2448
2449    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2450        obmc -= src_x;
2451        b_w += src_x;
2452        if(!sliced && !offset_dst)
2453            dst -= src_x;
2454        src_x=0;
2455    }else if(src_x + b_w > w){
2456        b_w = w - src_x;
2457    }
2458    if(src_y<0){
2459        obmc -= src_y*obmc_stride;
2460        b_h += src_y;
2461        if(!sliced && !offset_dst)
2462            dst -= src_y*dst_stride;
2463        src_y=0;
2464    }else if(src_y + b_h> h){
2465        b_h = h - src_y;
2466    }
2467
2468    if(b_w<=0 || b_h<=0) return;
2469
2470    assert(src_stride > 2*MB_SIZE + 5);
2471
2472    if(!sliced && offset_dst)
2473        dst += src_x + src_y*dst_stride;
2474    dst8+= src_x + src_y*src_stride;
2475//    src += src_x + src_y*src_stride;
2476
2477    ptmp= tmp + 3*tmp_step;
2478    block[0]= ptmp;
2479    ptmp+=tmp_step;
2480    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2481
2482    if(same_block(lt, rt)){
2483        block[1]= block[0];
2484    }else{
2485        block[1]= ptmp;
2486        ptmp+=tmp_step;
2487        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2488    }
2489
2490    if(same_block(lt, lb)){
2491        block[2]= block[0];
2492    }else if(same_block(rt, lb)){
2493        block[2]= block[1];
2494    }else{
2495        block[2]= ptmp;
2496        ptmp+=tmp_step;
2497        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2498    }
2499
2500    if(same_block(lt, rb) ){
2501        block[3]= block[0];
2502    }else if(same_block(rt, rb)){
2503        block[3]= block[1];
2504    }else if(same_block(lb, rb)){
2505        block[3]= block[2];
2506    }else{
2507        block[3]= ptmp;
2508        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2509    }
2510#if 0
2511    for(y=0; y<b_h; y++){
2512        for(x=0; x<b_w; x++){
2513            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2514            if(add) dst[x + y*dst_stride] += v;
2515            else    dst[x + y*dst_stride] -= v;
2516        }
2517    }
2518    for(y=0; y<b_h; y++){
2519        uint8_t *obmc2= obmc + (obmc_stride>>1);
2520        for(x=0; x<b_w; x++){
2521            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2522            if(add) dst[x + y*dst_stride] += v;
2523            else    dst[x + y*dst_stride] -= v;
2524        }
2525    }
2526    for(y=0; y<b_h; y++){
2527        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2528        for(x=0; x<b_w; x++){
2529            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2530            if(add) dst[x + y*dst_stride] += v;
2531            else    dst[x + y*dst_stride] -= v;
2532        }
2533    }
2534    for(y=0; y<b_h; y++){
2535        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2536        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2537        for(x=0; x<b_w; x++){
2538            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2539            if(add) dst[x + y*dst_stride] += v;
2540            else    dst[x + y*dst_stride] -= v;
2541        }
2542    }
2543#else
2544    if(sliced){
2545        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2546    }else{
2547        for(y=0; y<b_h; y++){
2548            //FIXME ugly misuse of obmc_stride
2549            const uint8_t *obmc1= obmc + y*obmc_stride;
2550            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2551            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2552            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2553            for(x=0; x<b_w; x++){
2554                int v=   obmc1[x] * block[3][x + y*src_stride]
2555                        +obmc2[x] * block[2][x + y*src_stride]
2556                        +obmc3[x] * block[1][x + y*src_stride]
2557                        +obmc4[x] * block[0][x + y*src_stride];
2558
2559                v <<= 8 - LOG2_OBMC_MAX;
2560                if(FRAC_BITS != 8){
2561                    v >>= 8 - FRAC_BITS;
2562                }
2563                if(add){
2564                    v += dst[x + y*dst_stride];
2565                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2566                    if(v&(~255)) v= ~(v>>31);
2567                    dst8[x + y*src_stride] = v;
2568                }else{
2569                    dst[x + y*dst_stride] -= v;
2570                }
2571            }
2572        }
2573    }
2574#endif /* 0 */
2575}
2576
2577static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2578    Plane *p= &s->plane[plane_index];
2579    const int mb_w= s->b_width  << s->block_max_depth;
2580    const int mb_h= s->b_height << s->block_max_depth;
2581    int x, y, mb_x;
2582    int block_size = MB_SIZE >> s->block_max_depth;
2583    int block_w    = plane_index ? block_size/2 : block_size;
2584    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2585    int obmc_stride= plane_index ? block_size : 2*block_size;
2586    int ref_stride= s->current_picture.linesize[plane_index];
2587    uint8_t *dst8= s->current_picture.data[plane_index];
2588    int w= p->width;
2589    int h= p->height;
2590
2591    if(s->keyframe || (s->avctx->debug&512)){
2592        if(mb_y==mb_h)
2593            return;
2594
2595        if(add){
2596            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2597//                DWTELEM * line = slice_buffer_get_line(sb, y);
2598                IDWTELEM * line = sb->line[y];
2599                for(x=0; x<w; x++){
2600//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2602                    v >>= FRAC_BITS;
2603                    if(v&(~255)) v= ~(v>>31);
2604                    dst8[x + y*ref_stride]= v;
2605                }
2606            }
2607        }else{
2608            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2609//                DWTELEM * line = slice_buffer_get_line(sb, y);
2610                IDWTELEM * line = sb->line[y];
2611                for(x=0; x<w; x++){
2612                    line[x] -= 128 << FRAC_BITS;
2613//                    buf[x + y*w]-= 128<<FRAC_BITS;
2614                }
2615            }
2616        }
2617
2618        return;
2619    }
2620
2621    for(mb_x=0; mb_x<=mb_w; mb_x++){
2622        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2623                   block_w*mb_x - block_w/2,
2624                   block_w*mb_y - block_w/2,
2625                   block_w, block_w,
2626                   w, h,
2627                   w, ref_stride, obmc_stride,
2628                   mb_x - 1, mb_y - 1,
2629                   add, 0, plane_index);
2630    }
2631}
2632
2633static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2634    Plane *p= &s->plane[plane_index];
2635    const int mb_w= s->b_width  << s->block_max_depth;
2636    const int mb_h= s->b_height << s->block_max_depth;
2637    int x, y, mb_x;
2638    int block_size = MB_SIZE >> s->block_max_depth;
2639    int block_w    = plane_index ? block_size/2 : block_size;
2640    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2641    const int obmc_stride= plane_index ? block_size : 2*block_size;
2642    int ref_stride= s->current_picture.linesize[plane_index];
2643    uint8_t *dst8= s->current_picture.data[plane_index];
2644    int w= p->width;
2645    int h= p->height;
2646
2647    if(s->keyframe || (s->avctx->debug&512)){
2648        if(mb_y==mb_h)
2649            return;
2650
2651        if(add){
2652            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2653                for(x=0; x<w; x++){
2654                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655                    v >>= FRAC_BITS;
2656                    if(v&(~255)) v= ~(v>>31);
2657                    dst8[x + y*ref_stride]= v;
2658                }
2659            }
2660        }else{
2661            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2662                for(x=0; x<w; x++){
2663                    buf[x + y*w]-= 128<<FRAC_BITS;
2664                }
2665            }
2666        }
2667
2668        return;
2669    }
2670
2671    for(mb_x=0; mb_x<=mb_w; mb_x++){
2672        add_yblock(s, 0, NULL, buf, dst8, obmc,
2673                   block_w*mb_x - block_w/2,
2674                   block_w*mb_y - block_w/2,
2675                   block_w, block_w,
2676                   w, h,
2677                   w, ref_stride, obmc_stride,
2678                   mb_x - 1, mb_y - 1,
2679                   add, 1, plane_index);
2680    }
2681}
2682
2683static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2684    const int mb_h= s->b_height << s->block_max_depth;
2685    int mb_y;
2686    for(mb_y=0; mb_y<=mb_h; mb_y++)
2687        predict_slice(s, buf, plane_index, add, mb_y);
2688}
2689
2690static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2691    int i, x2, y2;
2692    Plane *p= &s->plane[plane_index];
2693    const int block_size = MB_SIZE >> s->block_max_depth;
2694    const int block_w    = plane_index ? block_size/2 : block_size;
2695    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2696    const int obmc_stride= plane_index ? block_size : 2*block_size;
2697    const int ref_stride= s->current_picture.linesize[plane_index];
2698    uint8_t *src= s-> input_picture.data[plane_index];
2699    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2700    const int b_stride = s->b_width << s->block_max_depth;
2701    const int w= p->width;
2702    const int h= p->height;
2703    int index= mb_x + mb_y*b_stride;
2704    BlockNode *b= &s->block[index];
2705    BlockNode backup= *b;
2706    int ab=0;
2707    int aa=0;
2708
2709    b->type|= BLOCK_INTRA;
2710    b->color[plane_index]= 0;
2711    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2712
2713    for(i=0; i<4; i++){
2714        int mb_x2= mb_x + (i &1) - 1;
2715        int mb_y2= mb_y + (i>>1) - 1;
2716        int x= block_w*mb_x2 + block_w/2;
2717        int y= block_w*mb_y2 + block_w/2;
2718
2719        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2720                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2721
2722        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2723            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2724                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2725                int obmc_v= obmc[index];
2726                int d;
2727                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2728                if(x<0) obmc_v += obmc[index + block_w];
2729                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2730                if(x+block_w>w) obmc_v += obmc[index - block_w];
2731                //FIXME precalculate this or simplify it somehow else
2732
2733                d = -dst[index] + (1<<(FRAC_BITS-1));
2734                dst[index] = d;
2735                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2736                aa += obmc_v * obmc_v; //FIXME precalculate this
2737            }
2738        }
2739    }
2740    *b= backup;
2741
2742    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2743}
2744
2745static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2746    const int b_stride = s->b_width << s->block_max_depth;
2747    const int b_height = s->b_height<< s->block_max_depth;
2748    int index= x + y*b_stride;
2749    const BlockNode *b     = &s->block[index];
2750    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2751    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2752    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2753    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2754    int dmx, dmy;
2755//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2756//  int my_context= av_log2(2*FFABS(left->my - top->my));
2757
2758    if(x<0 || x>=b_stride || y>=b_height)
2759        return 0;
2760/*
27611            0      0
276201X          1-2    1
2763001XX        3-6    2-3
27640001XXX      7-14   4-7
276500001XXXX   15-30   8-15
2766*/
2767//FIXME try accurate rate
2768//FIXME intra and inter predictors if surrounding blocks are not the same type
2769    if(b->type & BLOCK_INTRA){
2770        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2771                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2772                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2773    }else{
2774        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2775        dmx-= b->mx;
2776        dmy-= b->my;
2777        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2778                    + av_log2(2*FFABS(dmy))
2779                    + av_log2(2*b->ref));
2780    }
2781}
2782
2783static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2784    Plane *p= &s->plane[plane_index];
2785    const int block_size = MB_SIZE >> s->block_max_depth;
2786    const int block_w    = plane_index ? block_size/2 : block_size;
2787    const int obmc_stride= plane_index ? block_size : 2*block_size;
2788    const int ref_stride= s->current_picture.linesize[plane_index];
2789    uint8_t *dst= s->current_picture.data[plane_index];
2790    uint8_t *src= s->  input_picture.data[plane_index];
2791    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2792    uint8_t *cur = s->scratchbuf;
2793    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2794    const int b_stride = s->b_width << s->block_max_depth;
2795    const int b_height = s->b_height<< s->block_max_depth;
2796    const int w= p->width;
2797    const int h= p->height;
2798    int distortion;
2799    int rate= 0;
2800    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2801    int sx= block_w*mb_x - block_w/2;
2802    int sy= block_w*mb_y - block_w/2;
2803    int x0= FFMAX(0,-sx);
2804    int y0= FFMAX(0,-sy);
2805    int x1= FFMIN(block_w*2, w-sx);
2806    int y1= FFMIN(block_w*2, h-sy);
2807    int i,x,y;
2808
2809    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2810
2811    for(y=y0; y<y1; y++){
2812        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2813        const IDWTELEM *pred1 = pred + y*obmc_stride;
2814        uint8_t *cur1 = cur + y*ref_stride;
2815        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2816        for(x=x0; x<x1; x++){
2817#if FRAC_BITS >= LOG2_OBMC_MAX
2818            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2819#else
2820            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2821#endif
2822            v = (v + pred1[x]) >> FRAC_BITS;
2823            if(v&(~255)) v= ~(v>>31);
2824            dst1[x] = v;
2825        }
2826    }
2827
2828    /* copy the regions where obmc[] = (uint8_t)256 */
2829    if(LOG2_OBMC_MAX == 8
2830        && (mb_x == 0 || mb_x == b_stride-1)
2831        && (mb_y == 0 || mb_y == b_height-1)){
2832        if(mb_x == 0)
2833            x1 = block_w;
2834        else
2835            x0 = block_w;
2836        if(mb_y == 0)
2837            y1 = block_w;
2838        else
2839            y0 = block_w;
2840        for(y=y0; y<y1; y++)
2841            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2842    }
2843
2844    if(block_w==16){
2845        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2846        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2847        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2848         * So improving the score of one block is not strictly guaranteed
2849         * to improve the score of the whole frame, thus iterative motion
2850         * estimation does not always converge. */
2851        if(s->avctx->me_cmp == FF_CMP_W97)
2852            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2853        else if(s->avctx->me_cmp == FF_CMP_W53)
2854            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2855        else{
2856            distortion = 0;
2857            for(i=0; i<4; i++){
2858                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2859                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2860            }
2861        }
2862    }else{
2863        assert(block_w==8);
2864        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2865    }
2866
2867    if(plane_index==0){
2868        for(i=0; i<4; i++){
2869/* ..RRr
2870 * .RXx.
2871 * rxx..
2872 */
2873            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2874        }
2875        if(mb_x == b_stride-2)
2876            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2877    }
2878    return distortion + rate*penalty_factor;
2879}
2880
2881static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2882    int i, y2;
2883    Plane *p= &s->plane[plane_index];
2884    const int block_size = MB_SIZE >> s->block_max_depth;
2885    const int block_w    = plane_index ? block_size/2 : block_size;
2886    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2887    const int obmc_stride= plane_index ? block_size : 2*block_size;
2888    const int ref_stride= s->current_picture.linesize[plane_index];
2889    uint8_t *dst= s->current_picture.data[plane_index];
2890    uint8_t *src= s-> input_picture.data[plane_index];
2891    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2892    // const has only been removed from zero_dst to suppress a warning
2893    static IDWTELEM zero_dst[4096]; //FIXME
2894    const int b_stride = s->b_width << s->block_max_depth;
2895    const int w= p->width;
2896    const int h= p->height;
2897    int distortion= 0;
2898    int rate= 0;
2899    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900
2901    for(i=0; i<9; i++){
2902        int mb_x2= mb_x + (i%3) - 1;
2903        int mb_y2= mb_y + (i/3) - 1;
2904        int x= block_w*mb_x2 + block_w/2;
2905        int y= block_w*mb_y2 + block_w/2;
2906
2907        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2908                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2909
2910        //FIXME find a cleaner/simpler way to skip the outside stuff
2911        for(y2= y; y2<0; y2++)
2912            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2913        for(y2= h; y2<y+block_w; y2++)
2914            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2915        if(x<0){
2916            for(y2= y; y2<y+block_w; y2++)
2917                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2918        }
2919        if(x+block_w > w){
2920            for(y2= y; y2<y+block_w; y2++)
2921                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2922        }
2923
2924        assert(block_w== 8 || block_w==16);
2925        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2926    }
2927
2928    if(plane_index==0){
2929        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2930        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2931
2932/* ..RRRr
2933 * .RXXx.
2934 * .RXXx.
2935 * rxxx.
2936 */
2937        if(merged)
2938            rate = get_block_bits(s, mb_x, mb_y, 2);
2939        for(i=merged?4:0; i<9; i++){
2940            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2941            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2942        }
2943    }
2944    return distortion + rate*penalty_factor;
2945}
2946
2947static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2948    const int b_stride= s->b_width << s->block_max_depth;
2949    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2950    BlockNode backup= *block;
2951    int rd, index, value;
2952
2953    assert(mb_x>=0 && mb_y>=0);
2954    assert(mb_x<b_stride);
2955
2956    if(intra){
2957        block->color[0] = p[0];
2958        block->color[1] = p[1];
2959        block->color[2] = p[2];
2960        block->type |= BLOCK_INTRA;
2961    }else{
2962        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2963        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2964        if(s->me_cache[index] == value)
2965            return 0;
2966        s->me_cache[index]= value;
2967
2968        block->mx= p[0];
2969        block->my= p[1];
2970        block->type &= ~BLOCK_INTRA;
2971    }
2972
2973    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2974
2975//FIXME chroma
2976    if(rd < *best_rd){
2977        *best_rd= rd;
2978        return 1;
2979    }else{
2980        *block= backup;
2981        return 0;
2982    }
2983}
2984
2985/* special case for int[2] args we discard afterwards,
2986 * fixes compilation problem with gcc 2.95 */
2987static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2988    int p[2] = {p0, p1};
2989    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2990}
2991
2992static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2993    const int b_stride= s->b_width << s->block_max_depth;
2994    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2995    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2996    int rd, index, value;
2997
2998    assert(mb_x>=0 && mb_y>=0);
2999    assert(mb_x<b_stride);
3000    assert(((mb_x|mb_y)&1) == 0);
3001
3002    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3003    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3004    if(s->me_cache[index] == value)
3005        return 0;
3006    s->me_cache[index]= value;
3007
3008    block->mx= p0;
3009    block->my= p1;
3010    block->ref= ref;
3011    block->type &= ~BLOCK_INTRA;
3012    block[1]= block[b_stride]= block[b_stride+1]= *block;
3013
3014    rd= get_4block_rd(s, mb_x, mb_y, 0);
3015
3016//FIXME chroma
3017    if(rd < *best_rd){
3018        *best_rd= rd;
3019        return 1;
3020    }else{
3021        block[0]= backup[0];
3022        block[1]= backup[1];
3023        block[b_stride]= backup[2];
3024        block[b_stride+1]= backup[3];
3025        return 0;
3026    }
3027}
3028
3029static void iterative_me(SnowContext *s){
3030    int pass, mb_x, mb_y;
3031    const int b_width = s->b_width  << s->block_max_depth;
3032    const int b_height= s->b_height << s->block_max_depth;
3033    const int b_stride= b_width;
3034    int color[3];
3035
3036    {
3037        RangeCoder r = s->c;
3038        uint8_t state[sizeof(s->block_state)];
3039        memcpy(state, s->block_state, sizeof(s->block_state));
3040        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3041            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3042                encode_q_branch(s, 0, mb_x, mb_y);
3043        s->c = r;
3044        memcpy(s->block_state, state, sizeof(s->block_state));
3045    }
3046
3047    for(pass=0; pass<25; pass++){
3048        int change= 0;
3049
3050        for(mb_y= 0; mb_y<b_height; mb_y++){
3051            for(mb_x= 0; mb_x<b_width; mb_x++){
3052                int dia_change, i, j, ref;
3053                int best_rd= INT_MAX, ref_rd;
3054                BlockNode backup, ref_b;
3055                const int index= mb_x + mb_y * b_stride;
3056                BlockNode *block= &s->block[index];
3057                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3058                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3059                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3060                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3061                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3062                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3063                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3064                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3065                const int b_w= (MB_SIZE >> s->block_max_depth);
3066                uint8_t obmc_edged[b_w*2][b_w*2];
3067
3068                if(pass && (block->type & BLOCK_OPT))
3069                    continue;
3070                block->type |= BLOCK_OPT;
3071
3072                backup= *block;
3073
3074                if(!s->me_cache_generation)
3075                    memset(s->me_cache, 0, sizeof(s->me_cache));
3076                s->me_cache_generation += 1<<22;
3077
3078                //FIXME precalculate
3079                {
3080                    int x, y;
3081                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3082                    if(mb_x==0)
3083                        for(y=0; y<b_w*2; y++)
3084                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3085                    if(mb_x==b_stride-1)
3086                        for(y=0; y<b_w*2; y++)
3087                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3088                    if(mb_y==0){
3089                        for(x=0; x<b_w*2; x++)
3090                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3091                        for(y=1; y<b_w; y++)
3092                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3093                    }
3094                    if(mb_y==b_height-1){
3095                        for(x=0; x<b_w*2; x++)
3096                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3097                        for(y=b_w; y<b_w*2-1; y++)
3098                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3099                    }
3100                }
3101
3102                //skip stuff outside the picture
3103                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3104                    uint8_t *src= s->  input_picture.data[0];
3105                    uint8_t *dst= s->current_picture.data[0];
3106                    const int stride= s->current_picture.linesize[0];
3107                    const int block_w= MB_SIZE >> s->block_max_depth;
3108                    const int sx= block_w*mb_x - block_w/2;
3109                    const int sy= block_w*mb_y - block_w/2;
3110                    const int w= s->plane[0].width;
3111                    const int h= s->plane[0].height;
3112                    int y;
3113
3114                    for(y=sy; y<0; y++)
3115                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3116                    for(y=h; y<sy+block_w*2; y++)
3117                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3118                    if(sx<0){
3119                        for(y=sy; y<sy+block_w*2; y++)
3120                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3121                    }
3122                    if(sx+block_w*2 > w){
3123                        for(y=sy; y<sy+block_w*2; y++)
3124                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3125                    }
3126                }
3127
3128                // intra(black) = neighbors' contribution to the current block
3129                for(i=0; i<3; i++)
3130                    color[i]= get_dc(s, mb_x, mb_y, i);
3131
3132                // get previous score (cannot be cached due to OBMC)
3133                if(pass > 0 && (block->type&BLOCK_INTRA)){
3134                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3135                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3136                }else
3137                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3138
3139                ref_b= *block;
3140                ref_rd= best_rd;
3141                for(ref=0; ref < s->ref_frames; ref++){
3142                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3143                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3144                        continue;
3145                    block->ref= ref;
3146                    best_rd= INT_MAX;
3147
3148                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3149                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3150                    if(tb)
3151                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3152                    if(lb)
3153                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3154                    if(rb)
3155                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3156                    if(bb)
3157                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158
3159                    /* fullpel ME */
3160                    //FIXME avoid subpel interpolation / round to nearest integer
3161                    do{
3162                        dia_change=0;
3163                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3164                            for(j=0; j<i; j++){
3165                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3166                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3168                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3169                            }
3170                        }
3171                    }while(dia_change);
3172                    /* subpel ME */
3173                    do{
3174                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3175                        dia_change=0;
3176                        for(i=0; i<8; i++)
3177                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3178                    }while(dia_change);
3179                    //FIXME or try the standard 2 pass qpel or similar
3180
3181                    mvr[0][0]= block->mx;
3182                    mvr[0][1]= block->my;
3183                    if(ref_rd > best_rd){
3184                        ref_rd= best_rd;
3185                        ref_b= *block;
3186                    }
3187                }
3188                best_rd= ref_rd;
3189                *block= ref_b;
3190#if 1
3191                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3192                //FIXME RD style color selection
3193#endif
3194                if(!same_block(block, &backup)){
3195                    if(tb ) tb ->type &= ~BLOCK_OPT;
3196                    if(lb ) lb ->type &= ~BLOCK_OPT;
3197                    if(rb ) rb ->type &= ~BLOCK_OPT;
3198                    if(bb ) bb ->type &= ~BLOCK_OPT;
3199                    if(tlb) tlb->type &= ~BLOCK_OPT;
3200                    if(trb) trb->type &= ~BLOCK_OPT;
3201                    if(blb) blb->type &= ~BLOCK_OPT;
3202                    if(brb) brb->type &= ~BLOCK_OPT;
3203                    change ++;
3204                }
3205            }
3206        }
3207        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3208        if(!change)
3209            break;
3210    }
3211
3212    if(s->block_max_depth == 1){
3213        int change= 0;
3214        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3215            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3216                int i;
3217                int best_rd, init_rd;
3218                const int index= mb_x + mb_y * b_stride;
3219                BlockNode *b[4];
3220
3221                b[0]= &s->block[index];
3222                b[1]= b[0]+1;
3223                b[2]= b[0]+b_stride;
3224                b[3]= b[2]+1;
3225                if(same_block(b[0], b[1]) &&
3226                   same_block(b[0], b[2]) &&
3227                   same_block(b[0], b[3]))
3228                    continue;
3229
3230                if(!s->me_cache_generation)
3231                    memset(s->me_cache, 0, sizeof(s->me_cache));
3232                s->me_cache_generation += 1<<22;
3233
3234                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3235
3236                //FIXME more multiref search?
3237                check_4block_inter(s, mb_x, mb_y,
3238                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3239                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3240
3241                for(i=0; i<4; i++)
3242                    if(!(b[i]->type&BLOCK_INTRA))
3243                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3244
3245                if(init_rd != best_rd)
3246                    change++;
3247            }
3248        }
3249        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3250    }
3251}
3252
3253static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3254    const int w= b->width;
3255    const int h= b->height;
3256    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3257    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3258    int x,y, thres1, thres2;
3259
3260    if(s->qlog == LOSSLESS_QLOG){
3261        for(y=0; y<h; y++)
3262            for(x=0; x<w; x++)
3263                dst[x + y*stride]= src[x + y*stride];
3264        return;
3265    }
3266
3267    bias= bias ? 0 : (3*qmul)>>3;
3268    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3269    thres2= 2*thres1;
3270
3271    if(!bias){
3272        for(y=0; y<h; y++){
3273            for(x=0; x<w; x++){
3274                int i= src[x + y*stride];
3275
3276                if((unsigned)(i+thres1) > thres2){
3277                    if(i>=0){
3278                        i<<= QEXPSHIFT;
3279                        i/= qmul; //FIXME optimize
3280                        dst[x + y*stride]=  i;
3281                    }else{
3282                        i= -i;
3283                        i<<= QEXPSHIFT;
3284                        i/= qmul; //FIXME optimize
3285                        dst[x + y*stride]= -i;
3286                    }
3287                }else
3288                    dst[x + y*stride]= 0;
3289            }
3290        }
3291    }else{
3292        for(y=0; y<h; y++){
3293            for(x=0; x<w; x++){
3294                int i= src[x + y*stride];
3295
3296                if((unsigned)(i+thres1) > thres2){
3297                    if(i>=0){
3298                        i<<= QEXPSHIFT;
3299                        i= (i + bias) / qmul; //FIXME optimize
3300                        dst[x + y*stride]=  i;
3301                    }else{
3302                        i= -i;
3303                        i<<= QEXPSHIFT;
3304                        i= (i + bias) / qmul; //FIXME optimize
3305                        dst[x + y*stride]= -i;
3306                    }
3307                }else
3308                    dst[x + y*stride]= 0;
3309            }
3310        }
3311    }
3312}
3313
3314static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3315    const int w= b->width;
3316    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3317    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3318    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3319    int x,y;
3320
3321    if(s->qlog == LOSSLESS_QLOG) return;
3322
3323    for(y=start_y; y<end_y; y++){
3324//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3325        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3326        for(x=0; x<w; x++){
3327            int i= line[x];
3328            if(i<0){
3329                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3330            }else if(i>0){
3331                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3332            }
3333        }
3334    }
3335}
3336
3337static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3338    const int w= b->width;
3339    const int h= b->height;
3340    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3341    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3342    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3343    int x,y;
3344
3345    if(s->qlog == LOSSLESS_QLOG) return;
3346
3347    for(y=0; y<h; y++){
3348        for(x=0; x<w; x++){
3349            int i= src[x + y*stride];
3350            if(i<0){
3351                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3352            }else if(i>0){
3353                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3354            }
3355        }
3356    }
3357}
3358
3359static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3360    const int w= b->width;
3361    const int h= b->height;
3362    int x,y;
3363
3364    for(y=h-1; y>=0; y--){
3365        for(x=w-1; x>=0; x--){
3366            int i= x + y*stride;
3367
3368            if(x){
3369                if(use_median){
3370                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3371                    else  src[i] -= src[i - 1];
3372                }else{
3373                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3374                    else  src[i] -= src[i - 1];
3375                }
3376            }else{
3377                if(y) src[i] -= src[i - stride];
3378            }
3379        }
3380    }
3381}
3382
3383static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3384    const int w= b->width;
3385    int x,y;
3386
3387    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3388    IDWTELEM * prev;
3389
3390    if (start_y != 0)
3391        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3392
3393    for(y=start_y; y<end_y; y++){
3394        prev = line;
3395//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3397        for(x=0; x<w; x++){
3398            if(x){
3399                if(use_median){
3400                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3401                    else  line[x] += line[x - 1];
3402                }else{
3403                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3404                    else  line[x] += line[x - 1];
3405                }
3406            }else{
3407                if(y) line[x] += prev[x];
3408            }
3409        }
3410    }
3411}
3412
3413static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3414    const int w= b->width;
3415    const int h= b->height;
3416    int x,y;
3417
3418    for(y=0; y<h; y++){
3419        for(x=0; x<w; x++){
3420            int i= x + y*stride;
3421
3422            if(x){
3423                if(use_median){
3424                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3425                    else  src[i] += src[i - 1];
3426                }else{
3427                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3428                    else  src[i] += src[i - 1];
3429                }
3430            }else{
3431                if(y) src[i] += src[i - stride];
3432            }
3433        }
3434    }
3435}
3436
3437static void encode_qlogs(SnowContext *s){
3438    int plane_index, level, orientation;
3439
3440    for(plane_index=0; plane_index<2; plane_index++){
3441        for(level=0; level<s->spatial_decomposition_count; level++){
3442            for(orientation=level ? 1:0; orientation<4; orientation++){
3443                if(orientation==2) continue;
3444                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3445            }
3446        }
3447    }
3448}
3449
3450static void encode_header(SnowContext *s){
3451    int plane_index, i;
3452    uint8_t kstate[32];
3453
3454    memset(kstate, MID_STATE, sizeof(kstate));
3455
3456    put_rac(&s->c, kstate, s->keyframe);
3457    if(s->keyframe || s->always_reset){
3458        reset_contexts(s);
3459        s->last_spatial_decomposition_type=
3460        s->last_qlog=
3461        s->last_qbias=
3462        s->last_mv_scale=
3463        s->last_block_max_depth= 0;
3464        for(plane_index=0; plane_index<2; plane_index++){
3465            Plane *p= &s->plane[plane_index];
3466            p->last_htaps=0;
3467            p->last_diag_mc=0;
3468            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3469        }
3470    }
3471    if(s->keyframe){
3472        put_symbol(&s->c, s->header_state, s->version, 0);
3473        put_rac(&s->c, s->header_state, s->always_reset);
3474        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3475        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3476        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3477        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3478        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3479        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3480        put_rac(&s->c, s->header_state, s->spatial_scalability);
3481//        put_rac(&s->c, s->header_state, s->rate_scalability);
3482        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3483
3484        encode_qlogs(s);
3485    }
3486
3487    if(!s->keyframe){
3488        int update_mc=0;
3489        for(plane_index=0; plane_index<2; plane_index++){
3490            Plane *p= &s->plane[plane_index];
3491            update_mc |= p->last_htaps   != p->htaps;
3492            update_mc |= p->last_diag_mc != p->diag_mc;
3493            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3494        }
3495        put_rac(&s->c, s->header_state, update_mc);
3496        if(update_mc){
3497            for(plane_index=0; plane_index<2; plane_index++){
3498                Plane *p= &s->plane[plane_index];
3499                put_rac(&s->c, s->header_state, p->diag_mc);
3500                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3501                for(i= p->htaps/2; i; i--)
3502                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3503            }
3504        }
3505        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3506            put_rac(&s->c, s->header_state, 1);
3507            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3508            encode_qlogs(s);
3509        }else
3510            put_rac(&s->c, s->header_state, 0);
3511    }
3512
3513    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3514    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3515    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3516    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3517    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3518
3519}
3520
3521static void update_last_header_values(SnowContext *s){
3522    int plane_index;
3523
3524    if(!s->keyframe){
3525        for(plane_index=0; plane_index<2; plane_index++){
3526            Plane *p= &s->plane[plane_index];
3527            p->last_diag_mc= p->diag_mc;
3528            p->last_htaps  = p->htaps;
3529            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3530        }
3531    }
3532
3533    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3534    s->last_qlog                        = s->qlog;
3535    s->last_qbias                       = s->qbias;
3536    s->last_mv_scale                    = s->mv_scale;
3537    s->last_block_max_depth             = s->block_max_depth;
3538    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3539}
3540
3541static void decode_qlogs(SnowContext *s){
3542    int plane_index, level, orientation;
3543
3544    for(plane_index=0; plane_index<3; plane_index++){
3545        for(level=0; level<s->spatial_decomposition_count; level++){
3546            for(orientation=level ? 1:0; orientation<4; orientation++){
3547                int q;
3548                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3549                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3550                else                    q= get_symbol(&s->c, s->header_state, 1);
3551                s->plane[plane_index].band[level][orientation].qlog= q;
3552            }
3553        }
3554    }
3555}
3556
3557static int decode_header(SnowContext *s){
3558    int plane_index, tmp;
3559    uint8_t kstate[32];
3560
3561    memset(kstate, MID_STATE, sizeof(kstate));
3562
3563    s->keyframe= get_rac(&s->c, kstate);
3564    if(s->keyframe || s->always_reset){
3565        reset_contexts(s);
3566        s->spatial_decomposition_type=
3567        s->qlog=
3568        s->qbias=
3569        s->mv_scale=
3570        s->block_max_depth= 0;
3571    }
3572    if(s->keyframe){
3573        s->version= get_symbol(&s->c, s->header_state, 0);
3574        if(s->version>0){
3575            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3576            return -1;
3577        }
3578        s->always_reset= get_rac(&s->c, s->header_state);
3579        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3580        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3581        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3582        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3583        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3584        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3585        s->spatial_scalability= get_rac(&s->c, s->header_state);
3586//        s->rate_scalability= get_rac(&s->c, s->header_state);
3587        tmp= get_symbol(&s->c, s->header_state, 0)+1;
3588        if(tmp < 1 || tmp > MAX_REF_FRAMES){
3589            av_log(s->avctx, AV_LOG_ERROR, "reference frame count is %d\n", tmp);
3590            return -1;
3591        }
3592        s->max_ref_frames= tmp;
3593
3594        decode_qlogs(s);
3595    }
3596
3597    if(!s->keyframe){
3598        if(get_rac(&s->c, s->header_state)){
3599            for(plane_index=0; plane_index<2; plane_index++){
3600                int htaps, i, sum=0;
3601                Plane *p= &s->plane[plane_index];
3602                p->diag_mc= get_rac(&s->c, s->header_state);
3603                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3604                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3605                    return -1;
3606                p->htaps= htaps;
3607                for(i= htaps/2; i; i--){
3608                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3609                    sum += p->hcoeff[i];
3610                }
3611                p->hcoeff[0]= 32-sum;
3612            }
3613            s->plane[2].diag_mc= s->plane[1].diag_mc;
3614            s->plane[2].htaps  = s->plane[1].htaps;
3615            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3616        }
3617        if(get_rac(&s->c, s->header_state)){
3618            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3619            decode_qlogs(s);
3620        }
3621    }
3622
3623    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3624    if(s->spatial_decomposition_type > 1){
3625        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3626        return -1;
3627    }
3628
3629    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3630    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3631    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3632    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3633    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3634        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3635        s->block_max_depth= 0;
3636        return -1;
3637    }
3638
3639    return 0;
3640}
3641
3642static void init_qexp(void){
3643    int i;
3644    double v=128;
3645
3646    for(i=0; i<QROOT; i++){
3647        qexp[i]= lrintf(v);
3648        v *= pow(2, 1.0 / QROOT);
3649    }
3650}
3651
3652static av_cold int common_init(AVCodecContext *avctx){
3653    SnowContext *s = avctx->priv_data;
3654    int width, height;
3655    int i, j;
3656
3657    s->avctx= avctx;
3658    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3659
3660    dsputil_init(&s->dsp, avctx);
3661
3662#define mcf(dx,dy)\
3663    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3664    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3665        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3666    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3667    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3668        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3669
3670    mcf( 0, 0)
3671    mcf( 4, 0)
3672    mcf( 8, 0)
3673    mcf(12, 0)
3674    mcf( 0, 4)
3675    mcf( 4, 4)
3676    mcf( 8, 4)
3677    mcf(12, 4)
3678    mcf( 0, 8)
3679    mcf( 4, 8)
3680    mcf( 8, 8)
3681    mcf(12, 8)
3682    mcf( 0,12)
3683    mcf( 4,12)
3684    mcf( 8,12)
3685    mcf(12,12)
3686
3687#define mcfh(dx,dy)\
3688    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3689    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3690        mc_block_hpel ## dx ## dy ## 16;\
3691    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3692    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3693        mc_block_hpel ## dx ## dy ## 8;
3694
3695    mcfh(0, 0)
3696    mcfh(8, 0)
3697    mcfh(0, 8)
3698    mcfh(8, 8)
3699
3700    if(!qexp[0])
3701        init_qexp();
3702
3703//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3704
3705    width= s->avctx->width;
3706    height= s->avctx->height;
3707
3708    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3709    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3710
3711    for(i=0; i<MAX_REF_FRAMES; i++)
3712        for(j=0; j<MAX_REF_FRAMES; j++)
3713            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3714
3715    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3716    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3717
3718    return 0;
3719}
3720
3721static int common_init_after_header(AVCodecContext *avctx){
3722    SnowContext *s = avctx->priv_data;
3723    int plane_index, level, orientation;
3724
3725    for(plane_index=0; plane_index<3; plane_index++){
3726        int w= s->avctx->width;
3727        int h= s->avctx->height;
3728
3729        if(plane_index){
3730            w>>= s->chroma_h_shift;
3731            h>>= s->chroma_v_shift;
3732        }
3733        s->plane[plane_index].width = w;
3734        s->plane[plane_index].height= h;
3735
3736        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3737            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3738                SubBand *b= &s->plane[plane_index].band[level][orientation];
3739
3740                b->buf= s->spatial_dwt_buffer;
3741                b->level= level;
3742                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3743                b->width = (w + !(orientation&1))>>1;
3744                b->height= (h + !(orientation>1))>>1;
3745
3746                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3747                b->buf_x_offset = 0;
3748                b->buf_y_offset = 0;
3749
3750                if(orientation&1){
3751                    b->buf += (w+1)>>1;
3752                    b->buf_x_offset = (w+1)>>1;
3753                }
3754                if(orientation>1){
3755                    b->buf += b->stride>>1;
3756                    b->buf_y_offset = b->stride_line >> 1;
3757                }
3758                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3759
3760                if(level)
3761                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3762                //FIXME avoid this realloc
3763                av_freep(&b->x_coeff);
3764                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3765            }
3766            w= (w+1)>>1;
3767            h= (h+1)>>1;
3768        }
3769    }
3770
3771    return 0;
3772}
3773
3774static int qscale2qlog(int qscale){
3775    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3776           + 61*QROOT/8; //<64 >60
3777}
3778
3779static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3780{
3781    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3782     * FIXME we know exact mv bits at this point,
3783     * but ratecontrol isn't set up to include them. */
3784    uint32_t coef_sum= 0;
3785    int level, orientation, delta_qlog;
3786
3787    for(level=0; level<s->spatial_decomposition_count; level++){
3788        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3789            SubBand *b= &s->plane[0].band[level][orientation];
3790            IDWTELEM *buf= b->ibuf;
3791            const int w= b->width;
3792            const int h= b->height;
3793            const int stride= b->stride;
3794            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3795            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3796            const int qdiv= (1<<16)/qmul;
3797            int x, y;
3798            //FIXME this is ugly
3799            for(y=0; y<h; y++)
3800                for(x=0; x<w; x++)
3801                    buf[x+y*stride]= b->buf[x+y*stride];
3802            if(orientation==0)
3803                decorrelate(s, b, buf, stride, 1, 0);
3804            for(y=0; y<h; y++)
3805                for(x=0; x<w; x++)
3806                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3807        }
3808    }
3809
3810    /* ugly, ratecontrol just takes a sqrt again */
3811    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3812    assert(coef_sum < INT_MAX);
3813
3814    if(pict->pict_type == FF_I_TYPE){
3815        s->m.current_picture.mb_var_sum= coef_sum;
3816        s->m.current_picture.mc_mb_var_sum= 0;
3817    }else{
3818        s->m.current_picture.mc_mb_var_sum= coef_sum;
3819        s->m.current_picture.mb_var_sum= 0;
3820    }
3821
3822    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3823    if (pict->quality < 0)
3824        return INT_MIN;
3825    s->lambda= pict->quality * 3/2;
3826    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3827    s->qlog+= delta_qlog;
3828    return delta_qlog;
3829}
3830
3831static void calculate_visual_weight(SnowContext *s, Plane *p){
3832    int width = p->width;
3833    int height= p->height;
3834    int level, orientation, x, y;
3835
3836    for(level=0; level<s->spatial_decomposition_count; level++){
3837        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3838            SubBand *b= &p->band[level][orientation];
3839            IDWTELEM *ibuf= b->ibuf;
3840            int64_t error=0;
3841
3842            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3843            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3844            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3845            for(y=0; y<height; y++){
3846                for(x=0; x<width; x++){
3847                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3848                    error += d*d;
3849                }
3850            }
3851
3852            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3853        }
3854    }
3855}
3856
3857#define QUANTIZE2 0
3858
3859#if QUANTIZE2==1
3860#define Q2_STEP 8
3861
3862static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3863    SubBand *b= &p->band[level][orientation];
3864    int x, y;
3865    int xo=0;
3866    int yo=0;
3867    int step= 1 << (s->spatial_decomposition_count - level);
3868
3869    if(orientation&1)
3870        xo= step>>1;
3871    if(orientation&2)
3872        yo= step>>1;
3873
3874    //FIXME bias for nonzero ?
3875    //FIXME optimize
3876    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3877    for(y=0; y<p->height; y++){
3878        for(x=0; x<p->width; x++){
3879            int sx= (x-xo + step/2) / step / Q2_STEP;
3880            int sy= (y-yo + step/2) / step / Q2_STEP;
3881            int v= r0[x + y*p->width] - r1[x + y*p->width];
3882            assert(sx>=0 && sy>=0 && sx < score_stride);
3883            v= ((v+8)>>4)<<4;
3884            score[sx + sy*score_stride] += v*v;
3885            assert(score[sx + sy*score_stride] >= 0);
3886        }
3887    }
3888}
3889
3890static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3891    int level, orientation;
3892
3893    for(level=0; level<s->spatial_decomposition_count; level++){
3894        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3895            SubBand *b= &p->band[level][orientation];
3896            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3897
3898            dequantize(s, b, dst, b->stride);
3899        }
3900    }
3901}
3902
3903static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3904    int level, orientation, ys, xs, x, y, pass;
3905    IDWTELEM best_dequant[height * stride];
3906    IDWTELEM idwt2_buffer[height * stride];
3907    const int score_stride= (width + 10)/Q2_STEP;
3908    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3909    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3910    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3911
3912    //FIXME pass the copy cleanly ?
3913
3914//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3915    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3916
3917    for(level=0; level<s->spatial_decomposition_count; level++){
3918        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3919            SubBand *b= &p->band[level][orientation];
3920            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3921             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3922            assert(src == b->buf); // code does not depend on this but it is true currently
3923
3924            quantize(s, b, dst, src, b->stride, s->qbias);
3925        }
3926    }
3927    for(pass=0; pass<1; pass++){
3928        if(s->qbias == 0) //keyframe
3929            continue;
3930        for(level=0; level<s->spatial_decomposition_count; level++){
3931            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3932                SubBand *b= &p->band[level][orientation];
3933                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3934                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3935
3936                for(ys= 0; ys<Q2_STEP; ys++){
3937                    for(xs= 0; xs<Q2_STEP; xs++){
3938                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3939                        dequantize_all(s, p, idwt2_buffer, width, height);
3940                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3941                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3942                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3943                        for(y=ys; y<b->height; y+= Q2_STEP){
3944                            for(x=xs; x<b->width; x+= Q2_STEP){
3945                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3946                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3947                                //FIXME try more than just --
3948                            }
3949                        }
3950                        dequantize_all(s, p, idwt2_buffer, width, height);
3951                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3952                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3953                        for(y=ys; y<b->height; y+= Q2_STEP){
3954                            for(x=xs; x<b->width; x+= Q2_STEP){
3955                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3956                                if(score[score_idx] <= best_score[score_idx] + threshold){
3957                                    best_score[score_idx]= score[score_idx];
3958                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3959                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3960                                    //FIXME copy instead
3961                                }
3962                            }
3963                        }
3964                    }
3965                }
3966            }
3967        }
3968    }
3969    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3970}
3971
3972#endif /* QUANTIZE2==1 */
3973
3974static av_cold int encode_init(AVCodecContext *avctx)
3975{
3976    SnowContext *s = avctx->priv_data;
3977    int plane_index;
3978
3979    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3980        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3981               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3982        return -1;
3983    }
3984
3985    if(avctx->prediction_method == DWT_97
3986       && (avctx->flags & CODEC_FLAG_QSCALE)
3987       && avctx->global_quality == 0){
3988        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3989        return -1;
3990    }
3991
3992    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3993
3994    s->chroma_h_shift= 1; //FIXME XXX
3995    s->chroma_v_shift= 1;
3996
3997    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3998    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3999
4000    for(plane_index=0; plane_index<3; plane_index++){
4001        s->plane[plane_index].diag_mc= 1;
4002        s->plane[plane_index].htaps= 6;
4003        s->plane[plane_index].hcoeff[0]=  40;
4004        s->plane[plane_index].hcoeff[1]= -10;
4005        s->plane[plane_index].hcoeff[2]=   2;
4006        s->plane[plane_index].fast_mc= 1;
4007    }
4008
4009    common_init(avctx);
4010    alloc_blocks(s);
4011
4012    s->version=0;
4013
4014    s->m.avctx   = avctx;
4015    s->m.flags   = avctx->flags;
4016    s->m.bit_rate= avctx->bit_rate;
4017
4018    s->m.me.temp      =
4019    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4020    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4021    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4022    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4023    h263_encode_init(&s->m); //mv_penalty
4024
4025    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4026
4027    if(avctx->flags&CODEC_FLAG_PASS1){
4028        if(!avctx->stats_out)
4029            avctx->stats_out = av_mallocz(256);
4030    }
4031    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4032        if(ff_rate_control_init(&s->m) < 0)
4033            return -1;
4034    }
4035    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4036
4037    avctx->coded_frame= &s->current_picture;
4038    switch(avctx->pix_fmt){
4039//    case PIX_FMT_YUV444P:
4040//    case PIX_FMT_YUV422P:
4041    case PIX_FMT_YUV420P:
4042    case PIX_FMT_GRAY8:
4043//    case PIX_FMT_YUV411P:
4044//    case PIX_FMT_YUV410P:
4045        s->colorspace_type= 0;
4046        break;
4047/*    case PIX_FMT_RGB32:
4048        s->colorspace= 1;
4049        break;*/
4050    default:
4051        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4052        return -1;
4053    }
4054//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4055    s->chroma_h_shift= 1;
4056    s->chroma_v_shift= 1;
4057
4058    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4059    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4060
4061    s->avctx->get_buffer(s->avctx, &s->input_picture);
4062
4063    if(s->avctx->me_method == ME_ITER){
4064        int i;
4065        int size= s->b_width * s->b_height << 2*s->block_max_depth;
4066        for(i=0; i<s->max_ref_frames; i++){
4067            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4068            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4069        }
4070    }
4071
4072    return 0;
4073}
4074
4075#define USE_HALFPEL_PLANE 0
4076
4077static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4078    int p,x,y;
4079
4080    assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4081
4082    for(p=0; p<3; p++){
4083        int is_chroma= !!p;
4084        int w= s->avctx->width  >>is_chroma;
4085        int h= s->avctx->height >>is_chroma;
4086        int ls= frame->linesize[p];
4087        uint8_t *src= frame->data[p];
4088
4089        halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4090        halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4091        halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4092
4093        halfpel[0][p]= src;
4094        for(y=0; y<h; y++){
4095            for(x=0; x<w; x++){
4096                int i= y*ls + x;
4097
4098                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4099            }
4100        }
4101        for(y=0; y<h; y++){
4102            for(x=0; x<w; x++){
4103                int i= y*ls + x;
4104
4105                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4106            }
4107        }
4108        src= halfpel[1][p];
4109        for(y=0; y<h; y++){
4110            for(x=0; x<w; x++){
4111                int i= y*ls + x;
4112
4113                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4114            }
4115        }
4116
4117//FIXME border!
4118    }
4119}
4120
4121static int frame_start(SnowContext *s){
4122   AVFrame tmp;
4123   int w= s->avctx->width; //FIXME round up to x16 ?
4124   int h= s->avctx->height;
4125
4126    if(s->current_picture.data[0]){
4127        s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4128        s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4129        s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4130    }
4131
4132    tmp= s->last_picture[s->max_ref_frames-1];
4133    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4134    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4135    if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4136        halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4137    s->last_picture[0]= s->current_picture;
4138    s->current_picture= tmp;
4139
4140    if(s->keyframe){
4141        s->ref_frames= 0;
4142    }else{
4143        int i;
4144        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4145            if(i && s->last_picture[i-1].key_frame)
4146                break;
4147        s->ref_frames= i;
4148    }
4149
4150    s->current_picture.reference= 1;
4151    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4152        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4153        return -1;
4154    }
4155
4156    s->current_picture.key_frame= s->keyframe;
4157
4158    return 0;
4159}
4160
4161static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4162    SnowContext *s = avctx->priv_data;
4163    RangeCoder * const c= &s->c;
4164    AVFrame *pict = data;
4165    const int width= s->avctx->width;
4166    const int height= s->avctx->height;
4167    int level, orientation, plane_index, i, y;
4168    uint8_t rc_header_bak[sizeof(s->header_state)];
4169    uint8_t rc_block_bak[sizeof(s->block_state)];
4170
4171    ff_init_range_encoder(c, buf, buf_size);
4172    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4173
4174    for(i=0; i<3; i++){
4175        int shift= !!i;
4176        for(y=0; y<(height>>shift); y++)
4177            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4178                   &pict->data[i][y * pict->linesize[i]],
4179                   width>>shift);
4180    }
4181    s->new_picture = *pict;
4182
4183    s->m.picture_number= avctx->frame_number;
4184    if(avctx->flags&CODEC_FLAG_PASS2){
4185        s->m.pict_type =
4186        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4187        s->keyframe= pict->pict_type==FF_I_TYPE;
4188        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4189            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4190            if (pict->quality < 0)
4191                return -1;
4192        }
4193    }else{
4194        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4195        s->m.pict_type=
4196        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4197    }
4198
4199    if(s->pass1_rc && avctx->frame_number == 0)
4200        pict->quality= 2*FF_QP2LAMBDA;
4201    if(pict->quality){
4202        s->qlog= qscale2qlog(pict->quality);
4203        s->lambda = pict->quality * 3/2;
4204    }
4205    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4206        s->qlog= LOSSLESS_QLOG;
4207        s->lambda = 0;
4208    }//else keep previous frame's qlog until after motion estimation
4209
4210    frame_start(s);
4211
4212    s->m.current_picture_ptr= &s->m.current_picture;
4213    if(pict->pict_type == FF_P_TYPE){
4214        int block_width = (width +15)>>4;
4215        int block_height= (height+15)>>4;
4216        int stride= s->current_picture.linesize[0];
4217
4218        assert(s->current_picture.data[0]);
4219        assert(s->last_picture[0].data[0]);
4220
4221        s->m.avctx= s->avctx;
4222        s->m.current_picture.data[0]= s->current_picture.data[0];
4223        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
4224        s->m.    new_picture.data[0]= s->  input_picture.data[0];
4225        s->m.   last_picture_ptr= &s->m.   last_picture;
4226        s->m.linesize=
4227        s->m.   last_picture.linesize[0]=
4228        s->m.    new_picture.linesize[0]=
4229        s->m.current_picture.linesize[0]= stride;
4230        s->m.uvlinesize= s->current_picture.linesize[1];
4231        s->m.width = width;
4232        s->m.height= height;
4233        s->m.mb_width = block_width;
4234        s->m.mb_height= block_height;
4235        s->m.mb_stride=   s->m.mb_width+1;
4236        s->m.b8_stride= 2*s->m.mb_width+1;
4237        s->m.f_code=1;
4238        s->m.pict_type= pict->pict_type;
4239        s->m.me_method= s->avctx->me_method;
4240        s->m.me.scene_change_score=0;
4241        s->m.flags= s->avctx->flags;
4242        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4243        s->m.out_format= FMT_H263;
4244        s->m.unrestricted_mv= 1;
4245
4246        s->m.lambda = s->lambda;
4247        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4248        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4249
4250        s->m.dsp= s->dsp; //move
4251        ff_init_me(&s->m);
4252        s->dsp= s->m.dsp;
4253    }
4254
4255    if(s->pass1_rc){
4256        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4257        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4258    }
4259
4260redo_frame:
4261
4262    if(pict->pict_type == FF_I_TYPE)
4263        s->spatial_decomposition_count= 5;
4264    else
4265        s->spatial_decomposition_count= 5;
4266
4267    s->m.pict_type = pict->pict_type;
4268    s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4269
4270    common_init_after_header(avctx);
4271
4272    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4273        for(plane_index=0; plane_index<3; plane_index++){
4274            calculate_visual_weight(s, &s->plane[plane_index]);
4275        }
4276    }
4277
4278    encode_header(s);
4279    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4280    encode_blocks(s, 1);
4281    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4282
4283    for(plane_index=0; plane_index<3; plane_index++){
4284        Plane *p= &s->plane[plane_index];
4285        int w= p->width;
4286        int h= p->height;
4287        int x, y;
4288//        int bits= put_bits_count(&s->c.pb);
4289
4290        if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4291            //FIXME optimize
4292            if(pict->data[plane_index]) //FIXME gray hack
4293                for(y=0; y<h; y++){
4294                    for(x=0; x<w; x++){
4295                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4296                    }
4297                }
4298            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4299
4300            if(   plane_index==0
4301               && pict->pict_type == FF_P_TYPE
4302               && !(avctx->flags&CODEC_FLAG_PASS2)
4303               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4304                ff_init_range_encoder(c, buf, buf_size);
4305                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4306                pict->pict_type= FF_I_TYPE;
4307                s->keyframe=1;
4308                s->current_picture.key_frame=1;
4309                goto redo_frame;
4310            }
4311
4312            if(s->qlog == LOSSLESS_QLOG){
4313                for(y=0; y<h; y++){
4314                    for(x=0; x<w; x++){
4315                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4316                    }
4317                }
4318            }else{
4319                for(y=0; y<h; y++){
4320                    for(x=0; x<w; x++){
4321                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4322                    }
4323                }
4324            }
4325
4326            /*  if(QUANTIZE2)
4327                dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4328            else*/
4329                ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4330
4331            if(s->pass1_rc && plane_index==0){
4332                int delta_qlog = ratecontrol_1pass(s, pict);
4333                if (delta_qlog <= INT_MIN)
4334                    return -1;
4335                if(delta_qlog){
4336                    //reordering qlog in the bitstream would eliminate this reset
4337                    ff_init_range_encoder(c, buf, buf_size);
4338                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4339                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4340                    encode_header(s);
4341                    encode_blocks(s, 0);
4342                }
4343            }
4344
4345            for(level=0; level<s->spatial_decomposition_count; level++){
4346                for(orientation=level ? 1 : 0; orientation<4; orientation++){
4347                    SubBand *b= &p->band[level][orientation];
4348
4349                    if(!QUANTIZE2)
4350                        quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4351                    if(orientation==0)
4352                        decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4353                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4354                    assert(b->parent==NULL || b->parent->stride == b->stride*2);
4355                    if(orientation==0)
4356                        correlate(s, b, b->ibuf, b->stride, 1, 0);
4357                }
4358            }
4359
4360            for(level=0; level<s->spatial_decomposition_count; level++){
4361                for(orientation=level ? 1 : 0; orientation<4; orientation++){
4362                    SubBand *b= &p->band[level][orientation];
4363
4364                    dequantize(s, b, b->ibuf, b->stride);
4365                }
4366            }
4367
4368            ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4369            if(s->qlog == LOSSLESS_QLOG){
4370                for(y=0; y<h; y++){
4371                    for(x=0; x<w; x++){
4372                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4373                    }
4374                }
4375            }
4376            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4377        }else{
4378            //ME/MC only
4379            if(pict->pict_type == FF_I_TYPE){
4380                for(y=0; y<h; y++){
4381                    for(x=0; x<w; x++){
4382                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4383                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
4384                    }
4385                }
4386            }else{
4387                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4388                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4389            }
4390        }
4391        if(s->avctx->flags&CODEC_FLAG_PSNR){
4392            int64_t error= 0;
4393
4394            if(pict->data[plane_index]) //FIXME gray hack
4395                for(y=0; y<h; y++){
4396                    for(x=0; x<w; x++){
4397                        int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4398                        error += d*d;
4399                    }
4400                }
4401            s->avctx->error[plane_index] += error;
4402            s->current_picture.error[plane_index] = error;
4403        }
4404
4405    }
4406
4407    update_last_header_values(s);
4408
4409    if(s->last_picture[s->max_ref_frames-1].data[0]){
4410        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4411        for(i=0; i<9; i++)
4412            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4413                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4414    }
4415
4416    s->current_picture.coded_picture_number = avctx->frame_number;
4417    s->current_picture.pict_type = pict->pict_type;
4418    s->current_picture.quality = pict->quality;
4419    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4420    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4421    s->m.current_picture.display_picture_number =
4422    s->m.current_picture.coded_picture_number = avctx->frame_number;
4423    s->m.current_picture.quality = pict->quality;
4424    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4425    if(s->pass1_rc)
4426        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4427            return -1;
4428    if(avctx->flags&CODEC_FLAG_PASS1)
4429        ff_write_pass1_stats(&s->m);
4430    s->m.last_pict_type = s->m.pict_type;
4431    avctx->frame_bits = s->m.frame_bits;
4432    avctx->mv_bits = s->m.mv_bits;
4433    avctx->misc_bits = s->m.misc_bits;
4434    avctx->p_tex_bits = s->m.p_tex_bits;
4435
4436    emms_c();
4437
4438    return ff_rac_terminate(c);
4439}
4440
4441static av_cold void common_end(SnowContext *s){
4442    int plane_index, level, orientation, i;
4443
4444    av_freep(&s->spatial_dwt_buffer);
4445    av_freep(&s->spatial_idwt_buffer);
4446
4447    s->m.me.temp= NULL;
4448    av_freep(&s->m.me.scratchpad);
4449    av_freep(&s->m.me.map);
4450    av_freep(&s->m.me.score_map);
4451    av_freep(&s->m.obmc_scratchpad);
4452
4453    av_freep(&s->block);
4454    av_freep(&s->scratchbuf);
4455
4456    for(i=0; i<MAX_REF_FRAMES; i++){
4457        av_freep(&s->ref_mvs[i]);
4458        av_freep(&s->ref_scores[i]);
4459        if(s->last_picture[i].data[0])
4460            s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4461    }
4462
4463    for(plane_index=0; plane_index<3; plane_index++){
4464        for(level=s->spatial_decomposition_count-1; level>=0; level--){
4465            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4466                SubBand *b= &s->plane[plane_index].band[level][orientation];
4467
4468                av_freep(&b->x_coeff);
4469            }
4470        }
4471    }
4472}
4473
4474static av_cold int encode_end(AVCodecContext *avctx)
4475{
4476    SnowContext *s = avctx->priv_data;
4477
4478    common_end(s);
4479    av_free(avctx->stats_out);
4480
4481    return 0;
4482}
4483
4484static av_cold int decode_init(AVCodecContext *avctx)
4485{
4486    avctx->pix_fmt= PIX_FMT_YUV420P;
4487
4488    common_init(avctx);
4489
4490    return 0;
4491}
4492
4493static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4494    SnowContext *s = avctx->priv_data;
4495    RangeCoder * const c= &s->c;
4496    int bytes_read;
4497    AVFrame *picture = data;
4498    int level, orientation, plane_index, i;
4499
4500    ff_init_range_decoder(c, buf, buf_size);
4501    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4502
4503    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4504    if(decode_header(s)<0)
4505        return -1;
4506    common_init_after_header(avctx);
4507
4508    // realloc slice buffer for the case that spatial_decomposition_count changed
4509    slice_buffer_destroy(&s->sb);
4510    slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4511
4512    for(plane_index=0; plane_index<3; plane_index++){
4513        Plane *p= &s->plane[plane_index];
4514        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4515                                              && p->hcoeff[1]==-10
4516                                              && p->hcoeff[2]==2;
4517    }
4518
4519    alloc_blocks(s);
4520
4521    frame_start(s);
4522    //keyframe flag duplication mess FIXME
4523    if(avctx->debug&FF_DEBUG_PICT_INFO)
4524        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4525
4526    decode_blocks(s);
4527
4528    for(plane_index=0; plane_index<3; plane_index++){
4529        Plane *p= &s->plane[plane_index];
4530        int w= p->width;
4531        int h= p->height;
4532        int x, y;
4533        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4534
4535        if(s->avctx->debug&2048){
4536            memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4537            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4538
4539            for(y=0; y<h; y++){
4540                for(x=0; x<w; x++){
4541                    int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4542                    s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4543                }
4544            }
4545        }
4546
4547        {
4548        for(level=0; level<s->spatial_decomposition_count; level++){
4549            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4550                SubBand *b= &p->band[level][orientation];
4551                unpack_coeffs(s, b, b->parent, orientation);
4552            }
4553        }
4554        }
4555
4556        {
4557        const int mb_h= s->b_height << s->block_max_depth;
4558        const int block_size = MB_SIZE >> s->block_max_depth;
4559        const int block_w    = plane_index ? block_size/2 : block_size;
4560        int mb_y;
4561        DWTCompose cs[MAX_DECOMPOSITIONS];
4562        int yd=0, yq=0;
4563        int y;
4564        int end_y;
4565
4566        ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4567        for(mb_y=0; mb_y<=mb_h; mb_y++){
4568
4569            int slice_starty = block_w*mb_y;
4570            int slice_h = block_w*(mb_y+1);
4571            if (!(s->keyframe || s->avctx->debug&512)){
4572                slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4573                slice_h -= (block_w >> 1);
4574            }
4575
4576            for(level=0; level<s->spatial_decomposition_count; level++){
4577                for(orientation=level ? 1 : 0; orientation<4; orientation++){
4578                    SubBand *b= &p->band[level][orientation];
4579                    int start_y;
4580                    int end_y;
4581                    int our_mb_start = mb_y;
4582                    int our_mb_end = (mb_y + 1);
4583                    const int extra= 3;
4584                    start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4585                    end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4586                    if (!(s->keyframe || s->avctx->debug&512)){
4587                        start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4588                        end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4589                    }
4590                    start_y = FFMIN(b->height, start_y);
4591                    end_y = FFMIN(b->height, end_y);
4592
4593                    if (start_y != end_y){
4594                        if (orientation == 0){
4595                            SubBand * correlate_band = &p->band[0][0];
4596                            int correlate_end_y = FFMIN(b->height, end_y + 1);
4597                            int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4598                            decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4599                            correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4600                            dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4601                        }
4602                        else
4603                            decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4604                    }
4605                }
4606            }
4607
4608            for(; yd<slice_h; yd+=4){
4609                ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4610            }
4611
4612            if(s->qlog == LOSSLESS_QLOG){
4613                for(; yq<slice_h && yq<h; yq++){
4614                    IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4615                    for(x=0; x<w; x++){
4616                        line[x] <<= FRAC_BITS;
4617                    }
4618                }
4619            }
4620
4621            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4622
4623            y = FFMIN(p->height, slice_starty);
4624            end_y = FFMIN(p->height, slice_h);
4625            while(y < end_y)
4626                slice_buffer_release(&s->sb, y++);
4627        }
4628
4629        slice_buffer_flush(&s->sb);
4630        }
4631
4632    }
4633
4634    emms_c();
4635
4636    if(s->last_picture[s->max_ref_frames-1].data[0]){
4637        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4638        for(i=0; i<9; i++)
4639            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4640                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4641    }
4642
4643    if(!(s->avctx->debug&2048))
4644        *picture= s->current_picture;
4645    else
4646        *picture= s->mconly_picture;
4647
4648    *data_size = sizeof(AVFrame);
4649
4650    bytes_read= c->bytestream - c->bytestream_start;
4651    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4652
4653    return bytes_read;
4654}
4655
4656static av_cold int decode_end(AVCodecContext *avctx)
4657{
4658    SnowContext *s = avctx->priv_data;
4659
4660    slice_buffer_destroy(&s->sb);
4661
4662    common_end(s);
4663
4664    return 0;
4665}
4666
4667AVCodec snow_decoder = {
4668    "snow",
4669    CODEC_TYPE_VIDEO,
4670    CODEC_ID_SNOW,
4671    sizeof(SnowContext),
4672    decode_init,
4673    NULL,
4674    decode_end,
4675    decode_frame,
4676    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4677    NULL,
4678    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4679};
4680
4681#if CONFIG_SNOW_ENCODER
4682AVCodec snow_encoder = {
4683    "snow",
4684    CODEC_TYPE_VIDEO,
4685    CODEC_ID_SNOW,
4686    sizeof(SnowContext),
4687    encode_init,
4688    encode_frame,
4689    encode_end,
4690    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4691};
4692#endif
4693
4694
4695#ifdef TEST
4696#undef malloc
4697#undef free
4698#undef printf
4699#undef random
4700
4701int main(void){
4702    int width=256;
4703    int height=256;
4704    int buffer[2][width*height];
4705    SnowContext s;
4706    int i;
4707    s.spatial_decomposition_count=6;
4708    s.spatial_decomposition_type=1;
4709
4710    printf("testing 5/3 DWT\n");
4711    for(i=0; i<width*height; i++)
4712        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4713
4714    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4715    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4716
4717    for(i=0; i<width*height; i++)
4718        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4719
4720    printf("testing 9/7 DWT\n");
4721    s.spatial_decomposition_type=0;
4722    for(i=0; i<width*height; i++)
4723        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4724
4725    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4726    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4727
4728    for(i=0; i<width*height; i++)
4729        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4730
4731#if 0
4732    printf("testing AC coder\n");
4733    memset(s.header_state, 0, sizeof(s.header_state));
4734    ff_init_range_encoder(&s.c, buffer[0], 256*256);
4735    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4736
4737    for(i=-256; i<256; i++){
4738        put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4739    }
4740    ff_rac_terminate(&s.c);
4741
4742    memset(s.header_state, 0, sizeof(s.header_state));
4743    ff_init_range_decoder(&s.c, buffer[0], 256*256);
4744    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4745
4746    for(i=-256; i<256; i++){
4747        int j;
4748        j= get_symbol(&s.c, s.header_state, 1);
4749        if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4750    }
4751#endif
4752    {
4753    int level, orientation, x, y;
4754    int64_t errors[8][4];
4755    int64_t g=0;
4756
4757        memset(errors, 0, sizeof(errors));
4758        s.spatial_decomposition_count=3;
4759        s.spatial_decomposition_type=0;
4760        for(level=0; level<s.spatial_decomposition_count; level++){
4761            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4762                int w= width  >> (s.spatial_decomposition_count-level);
4763                int h= height >> (s.spatial_decomposition_count-level);
4764                int stride= width  << (s.spatial_decomposition_count-level);
4765                DWTELEM *buf= buffer[0];
4766                int64_t error=0;
4767
4768                if(orientation&1) buf+=w;
4769                if(orientation>1) buf+=stride>>1;
4770
4771                memset(buffer[0], 0, sizeof(int)*width*height);
4772                buf[w/2 + h/2*stride]= 256*256;
4773                ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4774                for(y=0; y<height; y++){
4775                    for(x=0; x<width; x++){
4776                        int64_t d= buffer[0][x + y*width];
4777                        error += d*d;
4778                        if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4779                    }
4780                    if(FFABS(height/2-y)<9 && level==2) printf("\n");
4781                }
4782                error= (int)(sqrt(error)+0.5);
4783                errors[level][orientation]= error;
4784                if(g) g=av_gcd(g, error);
4785                else g= error;
4786            }
4787        }
4788        printf("static int const visual_weight[][4]={\n");
4789        for(level=0; level<s.spatial_decomposition_count; level++){
4790            printf("  {");
4791            for(orientation=0; orientation<4; orientation++){
4792                printf("%8"PRId64",", errors[level][orientation]/g);
4793            }
4794            printf("},\n");
4795        }
4796        printf("};\n");
4797        {
4798            int level=2;
4799            int w= width  >> (s.spatial_decomposition_count-level);
4800            //int h= height >> (s.spatial_decomposition_count-level);
4801            int stride= width  << (s.spatial_decomposition_count-level);
4802            DWTELEM *buf= buffer[0];
4803            int64_t error=0;
4804
4805            buf+=w;
4806            buf+=stride>>1;
4807
4808            memset(buffer[0], 0, sizeof(int)*width*height);
4809#if 1
4810            for(y=0; y<height; y++){
4811                for(x=0; x<width; x++){
4812                    int tab[4]={0,2,3,1};
4813                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4814                }
4815            }
4816            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4817#else
4818            for(y=0; y<h; y++){
4819                for(x=0; x<w; x++){
4820                    buf[x + y*stride  ]=169;
4821                    buf[x + y*stride-w]=64;
4822                }
4823            }
4824            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4825#endif
4826            for(y=0; y<height; y++){
4827                for(x=0; x<width; x++){
4828                    int64_t d= buffer[0][x + y*width];
4829                    error += d*d;
4830                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4831                }
4832                if(FFABS(height/2-y)<9) printf("\n");
4833            }
4834        }
4835
4836    }
4837    return 0;
4838}
4839#endif /* TEST */
4840