1/*
2 * Copyright (c) 2014 Cl��ment B��sch
3 *
4 * This file is part of FFmpeg.
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/**
20 * @file
21 * hqx magnification filters (hq2x, hq3x, hq4x)
22 *
23 * Originally designed by Maxim Stephin.
24 *
25 * @see http://en.wikipedia.org/wiki/Hqx
26 * @see http://web.archive.org/web/20131114143602/http://www.hiend3d.com/hq3x.html
27 * @see http://blog.pkh.me/p/19-butchering-hqx-scaling-filters.html
28 */
29
30#include "libavutil/opt.h"
31#include "libavutil/avassert.h"
32#include "libavutil/pixdesc.h"
33#include "internal.h"
34
35typedef int (*hqxfunc_t)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
36
37typedef struct {
38    const AVClass *class;
39    int n;
40    hqxfunc_t func;
41    uint32_t rgbtoyuv[1<<24];
42} HQXContext;
43
44typedef struct ThreadData {
45    AVFrame *in, *out;
46    const uint32_t *rgbtoyuv;
47} ThreadData;
48
49#define OFFSET(x) offsetof(HQXContext, x)
50#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
51static const AVOption hqx_options[] = {
52    { "n", "set scale factor", OFFSET(n), AV_OPT_TYPE_INT, {.i64 = 3}, 2, 4, .flags = FLAGS },
53    { NULL }
54};
55
56AVFILTER_DEFINE_CLASS(hqx);
57
58static av_always_inline uint32_t rgb2yuv(const uint32_t *r2y, uint32_t c)
59{
60    return r2y[c & 0xffffff];
61}
62
63static av_always_inline int yuv_diff(uint32_t yuv1, uint32_t yuv2)
64{
65#define YMASK 0xff0000
66#define UMASK 0x00ff00
67#define VMASK 0x0000ff
68    return abs((yuv1 & YMASK) - (yuv2 & YMASK)) > (48 << 16) ||
69           abs((yuv1 & UMASK) - (yuv2 & UMASK)) > ( 7 <<  8) ||
70           abs((yuv1 & VMASK) - (yuv2 & VMASK)) > ( 6 <<  0);
71}
72
73/* (c1*w1 + c2*w2) >> s */
74static av_always_inline uint32_t interp_2px(uint32_t c1, int w1, uint32_t c2, int w2, int s)
75{
76    return (((((c1 & 0xff00ff00) >> 8) * w1 + ((c2 & 0xff00ff00) >> 8) * w2) << (8 - s)) & 0xff00ff00) |
77           (((((c1 & 0x00ff00ff)     ) * w1 + ((c2 & 0x00ff00ff)     ) * w2) >>      s ) & 0x00ff00ff);
78}
79
80/* (c1*w1 + c2*w2 + c3*w3) >> s */
81static av_always_inline uint32_t interp_3px(uint32_t c1, int w1, uint32_t c2, int w2, uint32_t c3, int w3, int s)
82{
83    return (((((c1 & 0xff00ff00) >> 8) * w1 + ((c2 & 0xff00ff00) >> 8) * w2 + ((c3 & 0xff00ff00) >> 8) * w3) << (8 - s)) & 0xff00ff00) |
84           (((((c1 & 0x00ff00ff)     ) * w1 + ((c2 & 0x00ff00ff)     ) * w2 + ((c3 & 0x00ff00ff)     ) * w3) >>      s ) & 0x00ff00ff);
85}
86
87/* m is the mask of diff with the center pixel that matters in the pattern, and
88 * r is the expected result (bit set to 1 if there is difference with the
89 * center, 0 otherwise) */
90#define P(m, r) ((k_shuffled & (m)) == (r))
91
92/* adjust 012345678 to 01235678: the mask doesn't contain the (null) diff
93 * between the center/current pixel and itself */
94#define DROP4(z) ((z) > 4 ? (z)-1 : (z))
95
96/* shuffle the input mask: move bit n (4-adjusted) to position stored in p<n> */
97#define SHF(x, rot, n) (((x) >> ((rot) ? 7-DROP4(n) : DROP4(n)) & 1) << DROP4(p##n))
98
99/* used to check if there is YUV difference between 2 pixels */
100#define WDIFF(c1, c2) yuv_diff(rgb2yuv(r2y, c1), rgb2yuv(r2y, c2))
101
102/* bootstrap template for every interpolation code. It defines the shuffled
103 * masks and surrounding pixels. The rot flag is used to indicate if it's a
104 * rotation; its basic effect is to shuffle k using p8..p0 instead of p0..p8 */
105#define INTERP_BOOTSTRAP(rot)                                           \
106    const int k_shuffled = SHF(k,rot,0) | SHF(k,rot,1) | SHF(k,rot,2)   \
107                         | SHF(k,rot,3) |       0      | SHF(k,rot,5)   \
108                         | SHF(k,rot,6) | SHF(k,rot,7) | SHF(k,rot,8);  \
109                                                                        \
110    const uint32_t w0 = w[p0], w1 = w[p1],                              \
111                   w3 = w[p3], w4 = w[p4], w5 = w[p5],                  \
112                               w7 = w[p7]
113
114/* Assuming p0..p8 is mapped to pixels 0..8, this function interpolates the
115 * top-left pixel in the total of the 2x2 pixels to interpolates. The function
116 * is also used for the 3 other pixels */
117static av_always_inline uint32_t hq2x_interp_1x1(const uint32_t *r2y, int k,
118                                                 const uint32_t *w,
119                                                 int p0, int p1, int p2,
120                                                 int p3, int p4, int p5,
121                                                 int p6, int p7, int p8)
122{
123    INTERP_BOOTSTRAP(0);
124
125    if ((P(0xbf,0x37) || P(0xdb,0x13)) && WDIFF(w1, w5))
126        return interp_2px(w4, 3, w3, 1, 2);
127    if ((P(0xdb,0x49) || P(0xef,0x6d)) && WDIFF(w7, w3))
128        return interp_2px(w4, 3, w1, 1, 2);
129    if ((P(0x0b,0x0b) || P(0xfe,0x4a) || P(0xfe,0x1a)) && WDIFF(w3, w1))
130        return w4;
131    if ((P(0x6f,0x2a) || P(0x5b,0x0a) || P(0xbf,0x3a) || P(0xdf,0x5a) ||
132         P(0x9f,0x8a) || P(0xcf,0x8a) || P(0xef,0x4e) || P(0x3f,0x0e) ||
133         P(0xfb,0x5a) || P(0xbb,0x8a) || P(0x7f,0x5a) || P(0xaf,0x8a) ||
134         P(0xeb,0x8a)) && WDIFF(w3, w1))
135        return interp_2px(w4, 3, w0, 1, 2);
136    if (P(0x0b,0x08))
137        return interp_3px(w4, 2, w0, 1, w1, 1, 2);
138    if (P(0x0b,0x02))
139        return interp_3px(w4, 2, w0, 1, w3, 1, 2);
140    if (P(0x2f,0x2f))
141        return interp_3px(w4, 14, w3, 1, w1, 1, 4);
142    if (P(0xbf,0x37) || P(0xdb,0x13))
143        return interp_3px(w4, 5, w1, 2, w3, 1, 3);
144    if (P(0xdb,0x49) || P(0xef,0x6d))
145        return interp_3px(w4, 5, w3, 2, w1, 1, 3);
146    if (P(0x1b,0x03) || P(0x4f,0x43) || P(0x8b,0x83) || P(0x6b,0x43))
147        return interp_2px(w4, 3, w3, 1, 2);
148    if (P(0x4b,0x09) || P(0x8b,0x89) || P(0x1f,0x19) || P(0x3b,0x19))
149        return interp_2px(w4, 3, w1, 1, 2);
150    if (P(0x7e,0x2a) || P(0xef,0xab) || P(0xbf,0x8f) || P(0x7e,0x0e))
151        return interp_3px(w4, 2, w3, 3, w1, 3, 3);
152    if (P(0xfb,0x6a) || P(0x6f,0x6e) || P(0x3f,0x3e) || P(0xfb,0xfa) ||
153        P(0xdf,0xde) || P(0xdf,0x1e))
154        return interp_2px(w4, 3, w0, 1, 2);
155    if (P(0x0a,0x00) || P(0x4f,0x4b) || P(0x9f,0x1b) || P(0x2f,0x0b) ||
156        P(0xbe,0x0a) || P(0xee,0x0a) || P(0x7e,0x0a) || P(0xeb,0x4b) ||
157        P(0x3b,0x1b))
158        return interp_3px(w4, 2, w3, 1, w1, 1, 2);
159    return interp_3px(w4, 6, w3, 1, w1, 1, 3);
160}
161
162/* Assuming p0..p8 is mapped to pixels 0..8, this function interpolates the
163 * top-left and top-center pixel in the total of the 3x3 pixels to
164 * interpolates. The function is also used for the 3 other couples of pixels
165 * defining the outline. The center pixel is not defined through this function,
166 * since it's just the same as the original value. */
167static av_always_inline void hq3x_interp_2x1(uint32_t *dst, int dst_linesize,
168                                             const uint32_t *r2y, int k,
169                                             const uint32_t *w,
170                                             int pos00, int pos01,
171                                             int p0, int p1, int p2,
172                                             int p3, int p4, int p5,
173                                             int p6, int p7, int p8,
174                                             int rotate)
175{
176    INTERP_BOOTSTRAP(rotate);
177
178    uint32_t *dst00 = &dst[dst_linesize*(pos00>>1) + (pos00&1)];
179    uint32_t *dst01 = &dst[dst_linesize*(pos01>>1) + (pos01&1)];
180
181    if ((P(0xdb,0x49) || P(0xef,0x6d)) && WDIFF(w7, w3))
182        *dst00 = interp_2px(w4, 3, w1, 1, 2);
183    else if ((P(0xbf,0x37) || P(0xdb,0x13)) && WDIFF(w1, w5))
184        *dst00 = interp_2px(w4, 3, w3, 1, 2);
185    else if ((P(0x0b,0x0b) || P(0xfe,0x4a) || P(0xfe,0x1a)) && WDIFF(w3, w1))
186        *dst00 = w4;
187    else if ((P(0x6f,0x2a) || P(0x5b,0x0a) || P(0xbf,0x3a) || P(0xdf,0x5a) ||
188              P(0x9f,0x8a) || P(0xcf,0x8a) || P(0xef,0x4e) || P(0x3f,0x0e) ||
189              P(0xfb,0x5a) || P(0xbb,0x8a) || P(0x7f,0x5a) || P(0xaf,0x8a) ||
190              P(0xeb,0x8a)) && WDIFF(w3, w1))
191        *dst00 = interp_2px(w4, 3, w0, 1, 2);
192    else if (P(0x4b,0x09) || P(0x8b,0x89) || P(0x1f,0x19) || P(0x3b,0x19))
193        *dst00 = interp_2px(w4, 3, w1, 1, 2);
194    else if (P(0x1b,0x03) || P(0x4f,0x43) || P(0x8b,0x83) || P(0x6b,0x43))
195        *dst00 = interp_2px(w4, 3, w3, 1, 2);
196    else if (P(0x7e,0x2a) || P(0xef,0xab) || P(0xbf,0x8f) || P(0x7e,0x0e))
197        *dst00 = interp_2px(w3, 1, w1, 1, 1);
198    else if (P(0x4f,0x4b) || P(0x9f,0x1b) || P(0x2f,0x0b) || P(0xbe,0x0a) ||
199             P(0xee,0x0a) || P(0x7e,0x0a) || P(0xeb,0x4b) || P(0x3b,0x1b))
200        *dst00 = interp_3px(w4, 2, w3, 7, w1, 7, 4);
201    else if (P(0x0b,0x08) || P(0xf9,0x68) || P(0xf3,0x62) || P(0x6d,0x6c) ||
202             P(0x67,0x66) || P(0x3d,0x3c) || P(0x37,0x36) || P(0xf9,0xf8) ||
203             P(0xdd,0xdc) || P(0xf3,0xf2) || P(0xd7,0xd6) || P(0xdd,0x1c) ||
204             P(0xd7,0x16) || P(0x0b,0x02))
205        *dst00 = interp_2px(w4, 3, w0, 1, 2);
206    else
207        *dst00 = interp_3px(w4, 2, w3, 1, w1, 1, 2);
208
209    if ((P(0xfe,0xde) || P(0x9e,0x16) || P(0xda,0x12) || P(0x17,0x16) ||
210         P(0x5b,0x12) || P(0xbb,0x12)) && WDIFF(w1, w5))
211        *dst01 = w4;
212    else if ((P(0x0f,0x0b) || P(0x5e,0x0a) || P(0xfb,0x7b) || P(0x3b,0x0b) ||
213              P(0xbe,0x0a) || P(0x7a,0x0a)) && WDIFF(w3, w1))
214        *dst01 = w4;
215    else if (P(0xbf,0x8f) || P(0x7e,0x0e) || P(0xbf,0x37) || P(0xdb,0x13))
216        *dst01 = interp_2px(w1, 3, w4, 1, 2);
217    else if (P(0x02,0x00) || P(0x7c,0x28) || P(0xed,0xa9) || P(0xf5,0xb4) ||
218             P(0xd9,0x90))
219        *dst01 = interp_2px(w4, 3, w1, 1, 2);
220    else if (P(0x4f,0x4b) || P(0xfb,0x7b) || P(0xfe,0x7e) || P(0x9f,0x1b) ||
221             P(0x2f,0x0b) || P(0xbe,0x0a) || P(0x7e,0x0a) || P(0xfb,0x4b) ||
222             P(0xfb,0xdb) || P(0xfe,0xde) || P(0xfe,0x56) || P(0x57,0x56) ||
223             P(0x97,0x16) || P(0x3f,0x1e) || P(0xdb,0x12) || P(0xbb,0x12))
224        *dst01 = interp_2px(w4, 7, w1, 1, 3);
225    else
226        *dst01 = w4;
227}
228
229/* Assuming p0..p8 is mapped to pixels 0..8, this function interpolates the
230 * top-left block of 2x2 pixels in the total of the 4x4 pixels (or 4 blocks) to
231 * interpolates. The function is also used for the 3 other blocks of 2x2
232 * pixels. */
233static av_always_inline void hq4x_interp_2x2(uint32_t *dst, int dst_linesize,
234                                             const uint32_t *r2y, int k,
235                                             const uint32_t *w,
236                                             int pos00, int pos01,
237                                             int pos10, int pos11,
238                                             int p0, int p1, int p2,
239                                             int p3, int p4, int p5,
240                                             int p6, int p7, int p8)
241{
242    INTERP_BOOTSTRAP(0);
243
244    uint32_t *dst00 = &dst[dst_linesize*(pos00>>1) + (pos00&1)];
245    uint32_t *dst01 = &dst[dst_linesize*(pos01>>1) + (pos01&1)];
246    uint32_t *dst10 = &dst[dst_linesize*(pos10>>1) + (pos10&1)];
247    uint32_t *dst11 = &dst[dst_linesize*(pos11>>1) + (pos11&1)];
248
249    const int cond00 = (P(0xbf,0x37) || P(0xdb,0x13)) && WDIFF(w1, w5);
250    const int cond01 = (P(0xdb,0x49) || P(0xef,0x6d)) && WDIFF(w7, w3);
251    const int cond02 = (P(0x6f,0x2a) || P(0x5b,0x0a) || P(0xbf,0x3a) ||
252                        P(0xdf,0x5a) || P(0x9f,0x8a) || P(0xcf,0x8a) ||
253                        P(0xef,0x4e) || P(0x3f,0x0e) || P(0xfb,0x5a) ||
254                        P(0xbb,0x8a) || P(0x7f,0x5a) || P(0xaf,0x8a) ||
255                        P(0xeb,0x8a)) && WDIFF(w3, w1);
256    const int cond03 = P(0xdb,0x49) || P(0xef,0x6d);
257    const int cond04 = P(0xbf,0x37) || P(0xdb,0x13);
258    const int cond05 = P(0x1b,0x03) || P(0x4f,0x43) || P(0x8b,0x83) ||
259                       P(0x6b,0x43);
260    const int cond06 = P(0x4b,0x09) || P(0x8b,0x89) || P(0x1f,0x19) ||
261                       P(0x3b,0x19);
262    const int cond07 = P(0x0b,0x08) || P(0xf9,0x68) || P(0xf3,0x62) ||
263                       P(0x6d,0x6c) || P(0x67,0x66) || P(0x3d,0x3c) ||
264                       P(0x37,0x36) || P(0xf9,0xf8) || P(0xdd,0xdc) ||
265                       P(0xf3,0xf2) || P(0xd7,0xd6) || P(0xdd,0x1c) ||
266                       P(0xd7,0x16) || P(0x0b,0x02);
267    const int cond08 = (P(0x0f,0x0b) || P(0x2b,0x0b) || P(0xfe,0x4a) ||
268                        P(0xfe,0x1a)) && WDIFF(w3, w1);
269    const int cond09 = P(0x2f,0x2f);
270    const int cond10 = P(0x0a,0x00);
271    const int cond11 = P(0x0b,0x09);
272    const int cond12 = P(0x7e,0x2a) || P(0xef,0xab);
273    const int cond13 = P(0xbf,0x8f) || P(0x7e,0x0e);
274    const int cond14 = P(0x4f,0x4b) || P(0x9f,0x1b) || P(0x2f,0x0b) ||
275                       P(0xbe,0x0a) || P(0xee,0x0a) || P(0x7e,0x0a) ||
276                       P(0xeb,0x4b) || P(0x3b,0x1b);
277    const int cond15 = P(0x0b,0x03);
278
279    if (cond00)
280        *dst00 = interp_2px(w4, 5, w3, 3, 3);
281    else if (cond01)
282        *dst00 = interp_2px(w4, 5, w1, 3, 3);
283    else if ((P(0x0b,0x0b) || P(0xfe,0x4a) || P(0xfe,0x1a)) && WDIFF(w3, w1))
284        *dst00 = w4;
285    else if (cond02)
286        *dst00 = interp_2px(w4, 5, w0, 3, 3);
287    else if (cond03)
288        *dst00 = interp_2px(w4, 3, w3, 1, 2);
289    else if (cond04)
290        *dst00 = interp_2px(w4, 3, w1, 1, 2);
291    else if (cond05)
292        *dst00 = interp_2px(w4, 5, w3, 3, 3);
293    else if (cond06)
294        *dst00 = interp_2px(w4, 5, w1, 3, 3);
295    else if (P(0x0f,0x0b) || P(0x5e,0x0a) || P(0x2b,0x0b) || P(0xbe,0x0a) ||
296             P(0x7a,0x0a) || P(0xee,0x0a))
297        *dst00 = interp_2px(w1, 1, w3, 1, 1);
298    else if (cond07)
299        *dst00 = interp_2px(w4, 5, w0, 3, 3);
300    else
301        *dst00 = interp_3px(w4, 2, w1, 1, w3, 1, 2);
302
303    if (cond00)
304        *dst01 = interp_2px(w4, 7, w3, 1, 3);
305    else if (cond08)
306        *dst01 = w4;
307    else if (cond02)
308        *dst01 = interp_2px(w4, 3, w0, 1, 2);
309    else if (cond09)
310        *dst01 = w4;
311    else if (cond10)
312        *dst01 = interp_3px(w4, 5, w1, 2, w3, 1, 3);
313    else if (P(0x0b,0x08))
314        *dst01 = interp_3px(w4, 5, w1, 2, w0, 1, 3);
315    else if (cond11)
316        *dst01 = interp_2px(w4, 5, w1, 3, 3);
317    else if (cond04)
318        *dst01 = interp_2px(w1, 3, w4, 1, 2);
319    else if (cond12)
320        *dst01 = interp_3px(w1, 2, w4, 1, w3, 1, 2);
321    else if (cond13)
322        *dst01 = interp_2px(w1, 5, w3, 3, 3);
323    else if (cond05)
324        *dst01 = interp_2px(w4, 7, w3, 1, 3);
325    else if (P(0xf3,0x62) || P(0x67,0x66) || P(0x37,0x36) || P(0xf3,0xf2) ||
326             P(0xd7,0xd6) || P(0xd7,0x16) || P(0x0b,0x02))
327        *dst01 = interp_2px(w4, 3, w0, 1, 2);
328    else if (cond14)
329        *dst01 = interp_2px(w1, 1, w4, 1, 1);
330    else
331        *dst01 = interp_2px(w4, 3, w1, 1, 2);
332
333    if (cond01)
334        *dst10 = interp_2px(w4, 7, w1, 1, 3);
335    else if (cond08)
336        *dst10 = w4;
337    else if (cond02)
338        *dst10 = interp_2px(w4, 3, w0, 1, 2);
339    else if (cond09)
340        *dst10 = w4;
341    else if (cond10)
342        *dst10 = interp_3px(w4, 5, w3, 2, w1, 1, 3);
343    else if (P(0x0b,0x02))
344        *dst10 = interp_3px(w4, 5, w3, 2, w0, 1, 3);
345    else if (cond15)
346        *dst10 = interp_2px(w4, 5, w3, 3, 3);
347    else if (cond03)
348        *dst10 = interp_2px(w3, 3, w4, 1, 2);
349    else if (cond13)
350        *dst10 = interp_3px(w3, 2, w4, 1, w1, 1, 2);
351    else if (cond12)
352        *dst10 = interp_2px(w3, 5, w1, 3, 3);
353    else if (cond06)
354        *dst10 = interp_2px(w4, 7, w1, 1, 3);
355    else if (P(0x0b,0x08) || P(0xf9,0x68) || P(0x6d,0x6c) || P(0x3d,0x3c) ||
356             P(0xf9,0xf8) || P(0xdd,0xdc) || P(0xdd,0x1c))
357        *dst10 = interp_2px(w4, 3, w0, 1, 2);
358    else if (cond14)
359        *dst10 = interp_2px(w3, 1, w4, 1, 1);
360    else
361        *dst10 = interp_2px(w4, 3, w3, 1, 2);
362
363    if ((P(0x7f,0x2b) || P(0xef,0xab) || P(0xbf,0x8f) || P(0x7f,0x0f)) &&
364         WDIFF(w3, w1))
365        *dst11 = w4;
366    else if (cond02)
367        *dst11 = interp_2px(w4, 7, w0, 1, 3);
368    else if (cond15)
369        *dst11 = interp_2px(w4, 7, w3, 1, 3);
370    else if (cond11)
371        *dst11 = interp_2px(w4, 7, w1, 1, 3);
372    else if (P(0x0a,0x00) || P(0x7e,0x2a) || P(0xef,0xab) || P(0xbf,0x8f) ||
373             P(0x7e,0x0e))
374        *dst11 = interp_3px(w4, 6, w3, 1, w1, 1, 3);
375    else if (cond07)
376        *dst11 = interp_2px(w4, 7, w0, 1, 3);
377    else
378        *dst11 = w4;
379}
380
381static av_always_inline void hqx_filter(const ThreadData *td, int jobnr, int nb_jobs, int n)
382{
383    int x, y;
384    AVFrame *in = td->in, *out = td->out;
385    const uint32_t *r2y = td->rgbtoyuv;
386    const int height = in->height;
387    const int width  = in->width;
388    const int slice_start = (height *  jobnr   ) / nb_jobs;
389    const int slice_end   = (height * (jobnr+1)) / nb_jobs;
390    const int dst_linesize = out->linesize[0];
391    const int src_linesize =  in->linesize[0];
392    uint8_t       *dst = out->data[0] + slice_start * dst_linesize * n;
393    const uint8_t *src =  in->data[0] + slice_start * src_linesize;
394
395    const int dst32_linesize = dst_linesize >> 2;
396    const int src32_linesize = src_linesize >> 2;
397
398    for (y = slice_start; y < slice_end; y++) {
399        const uint32_t *src32 = (const uint32_t *)src;
400        uint32_t       *dst32 = (uint32_t *)dst;
401        const int prevline = y > 0          ? -src32_linesize : 0;
402        const int nextline = y < height - 1 ?  src32_linesize : 0;
403
404        for (x = 0; x < width; x++) {
405            const int prevcol = x > 0        ? -1 : 0;
406            const int nextcol = x < width -1 ?  1 : 0;
407            const uint32_t w[3*3] = {
408                src32[prevcol + prevline], src32[prevline], src32[prevline + nextcol],
409                src32[prevcol           ], src32[       0], src32[           nextcol],
410                src32[prevcol + nextline], src32[nextline], src32[nextline + nextcol]
411            };
412            const uint32_t yuv1 = rgb2yuv(r2y, w[4]);
413            const int pattern = (w[4] != w[0] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[0]))) : 0)
414                              | (w[4] != w[1] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[1]))) : 0) << 1
415                              | (w[4] != w[2] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[2]))) : 0) << 2
416                              | (w[4] != w[3] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[3]))) : 0) << 3
417                              | (w[4] != w[5] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[5]))) : 0) << 4
418                              | (w[4] != w[6] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[6]))) : 0) << 5
419                              | (w[4] != w[7] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[7]))) : 0) << 6
420                              | (w[4] != w[8] ? (yuv_diff(yuv1, rgb2yuv(r2y, w[8]))) : 0) << 7;
421
422            if (n == 2) {
423                dst32[dst32_linesize*0 + 0] = hq2x_interp_1x1(r2y, pattern, w, 0,1,2,3,4,5,6,7,8);  // 00
424                dst32[dst32_linesize*0 + 1] = hq2x_interp_1x1(r2y, pattern, w, 2,1,0,5,4,3,8,7,6);  // 01 (vert mirrored)
425                dst32[dst32_linesize*1 + 0] = hq2x_interp_1x1(r2y, pattern, w, 6,7,8,3,4,5,0,1,2);  // 10 (horiz mirrored)
426                dst32[dst32_linesize*1 + 1] = hq2x_interp_1x1(r2y, pattern, w, 8,7,6,5,4,3,2,1,0);  // 11 (center mirrored)
427            } else if (n == 3) {
428                hq3x_interp_2x1(dst32,                        dst32_linesize, r2y, pattern, w, 0,1, 0,1,2,3,4,5,6,7,8, 0);  // 00 01
429                hq3x_interp_2x1(dst32 + 1,                    dst32_linesize, r2y, pattern, w, 1,3, 2,5,8,1,4,7,0,3,6, 1);  // 02 12 (rotated to the right)
430                hq3x_interp_2x1(dst32 + 1*dst32_linesize,     dst32_linesize, r2y, pattern, w, 2,0, 6,3,0,7,4,1,8,5,2, 1);  // 20 10 (rotated to the left)
431                hq3x_interp_2x1(dst32 + 1*dst32_linesize + 1, dst32_linesize, r2y, pattern, w, 3,2, 8,7,6,5,4,3,2,1,0, 0);  // 22 21 (center mirrored)
432                dst32[dst32_linesize + 1] = w[4];                                                                           // 11
433            } else if (n == 4) {
434                hq4x_interp_2x2(dst32,                        dst32_linesize, r2y, pattern, w, 0,1,2,3, 0,1,2,3,4,5,6,7,8); // 00 01 10 11
435                hq4x_interp_2x2(dst32 + 2,                    dst32_linesize, r2y, pattern, w, 1,0,3,2, 2,1,0,5,4,3,8,7,6); // 02 03 12 13 (vert mirrored)
436                hq4x_interp_2x2(dst32 + 2*dst32_linesize,     dst32_linesize, r2y, pattern, w, 2,3,0,1, 6,7,8,3,4,5,0,1,2); // 20 21 30 31 (horiz mirrored)
437                hq4x_interp_2x2(dst32 + 2*dst32_linesize + 2, dst32_linesize, r2y, pattern, w, 3,2,1,0, 8,7,6,5,4,3,2,1,0); // 22 23 32 33 (center mirrored)
438            } else {
439                av_assert0(0);
440            }
441
442            src32 += 1;
443            dst32 += n;
444        }
445
446        src += src_linesize;
447        dst += dst_linesize * n;
448    }
449}
450
451#define HQX_FUNC(size) \
452static int hq##size##x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
453{ \
454    hqx_filter(arg, jobnr, nb_jobs, size); \
455    return 0; \
456}
457
458HQX_FUNC(2)
459HQX_FUNC(3)
460HQX_FUNC(4)
461
462static int query_formats(AVFilterContext *ctx)
463{
464    static const enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
465    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
466    return 0;
467}
468
469static int config_output(AVFilterLink *outlink)
470{
471    AVFilterContext *ctx = outlink->src;
472    HQXContext *hqx = ctx->priv;
473    AVFilterLink *inlink = ctx->inputs[0];
474
475    outlink->w = inlink->w * hqx->n;
476    outlink->h = inlink->h * hqx->n;
477    av_log(inlink->dst, AV_LOG_VERBOSE, "fmt:%s size:%dx%d -> size:%dx%d\n",
478           av_get_pix_fmt_name(inlink->format),
479           inlink->w, inlink->h, outlink->w, outlink->h);
480    return 0;
481}
482
483static int filter_frame(AVFilterLink *inlink, AVFrame *in)
484{
485    AVFilterContext *ctx = inlink->dst;
486    AVFilterLink *outlink = ctx->outputs[0];
487    HQXContext *hqx = ctx->priv;
488    ThreadData td;
489    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
490    if (!out) {
491        av_frame_free(&in);
492        return AVERROR(ENOMEM);
493    }
494    av_frame_copy_props(out, in);
495    out->width  = outlink->w;
496    out->height = outlink->h;
497
498    td.in = in;
499    td.out = out;
500    td.rgbtoyuv = hqx->rgbtoyuv;
501    ctx->internal->execute(ctx, hqx->func, &td, NULL, FFMIN(inlink->h, ctx->graph->nb_threads));
502
503    av_frame_free(&in);
504    return ff_filter_frame(outlink, out);
505}
506
507static av_cold int init(AVFilterContext *ctx)
508{
509    HQXContext *hqx = ctx->priv;
510    static const hqxfunc_t hqxfuncs[] = {hq2x, hq3x, hq4x};
511
512    uint32_t c;
513    int bg, rg, g;
514
515    for (bg=-255; bg<256; bg++) {
516        for (rg=-255; rg<256; rg++) {
517            const uint32_t u = (uint32_t)((-169*rg + 500*bg)/1000) + 128;
518            const uint32_t v = (uint32_t)(( 500*rg -  81*bg)/1000) + 128;
519            int startg = FFMAX3(-bg, -rg, 0);
520            int endg = FFMIN3(255-bg, 255-rg, 255);
521            uint32_t y = (uint32_t)(( 299*rg + 1000*startg + 114*bg)/1000);
522            c = bg + (rg<<16) + 0x010101 * startg;
523            for (g = startg; g <= endg; g++) {
524                hqx->rgbtoyuv[c] = ((y++) << 16) + (u << 8) + v;
525                c+= 0x010101;
526            }
527        }
528    }
529
530    hqx->func = hqxfuncs[hqx->n - 2];
531    return 0;
532}
533
534static const AVFilterPad hqx_inputs[] = {
535    {
536        .name         = "default",
537        .type         = AVMEDIA_TYPE_VIDEO,
538        .filter_frame = filter_frame,
539    },
540    { NULL }
541};
542
543static const AVFilterPad hqx_outputs[] = {
544    {
545        .name         = "default",
546        .type         = AVMEDIA_TYPE_VIDEO,
547        .config_props = config_output,
548    },
549    { NULL }
550};
551
552AVFilter ff_vf_hqx = {
553    .name          = "hqx",
554    .description   = NULL_IF_CONFIG_SMALL("Scale the input by 2, 3 or 4 using the hq*x magnification algorithm."),
555    .priv_size     = sizeof(HQXContext),
556    .init          = init,
557    .query_formats = query_formats,
558    .inputs        = hqx_inputs,
559    .outputs       = hqx_outputs,
560    .priv_class    = &hqx_class,
561    .flags         = AVFILTER_FLAG_SLICE_THREADS,
562};
563