1/*
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of Libav.
5 *
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include <inttypes.h>
22#include <string.h>
23#include <math.h>
24#include <stdio.h>
25#include "config.h"
26#include <assert.h>
27#include "swscale.h"
28#include "swscale_internal.h"
29#include "rgb2rgb.h"
30#include "libavutil/intreadwrite.h"
31#include "libavutil/cpu.h"
32#include "libavutil/avutil.h"
33#include "libavutil/mathematics.h"
34#include "libavutil/bswap.h"
35#include "libavutil/pixdesc.h"
36
37#define DITHER1XBPP
38
39#define RGB2YUV_SHIFT 15
40#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
49
50/*
51NOTES
52Special versions: fast Y 1:1 scaling (no interpolation in y direction)
53
54TODO
55more intelligent misalignment avoidance for the horizontal scaler
56write special vertical cubic upscale version
57optimize C code (YV12 / minmax)
58add support for packed pixel YUV input & output
59add support for Y8 output
60optimize BGR24 & BGR32
61add BGR4 output support
62write special BGR->BGR scaler
63*/
64
65DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66{  1,   3,   1,   3,   1,   3,   1,   3, },
67{  2,   0,   2,   0,   2,   0,   2,   0, },
68};
69
70DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71{  6,   2,   6,   2,   6,   2,   6,   2, },
72{  0,   4,   0,   4,   0,   4,   0,   4, },
73};
74
75DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76{  8,   4,  11,   7,   8,   4,  11,   7, },
77{  2,  14,   1,  13,   2,  14,   1,  13, },
78{ 10,   6,   9,   5,  10,   6,   9,   5, },
79{  0,  12,   3,  15,   0,  12,   3,  15, },
80};
81
82DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83{ 17,   9,  23,  15,  16,   8,  22,  14, },
84{  5,  29,   3,  27,   4,  28,   2,  26, },
85{ 21,  13,  19,  11,  20,  12,  18,  10, },
86{  0,  24,   6,  30,   1,  25,   7,  31, },
87{ 16,   8,  22,  14,  17,   9,  23,  15, },
88{  4,  28,   2,  26,   5,  29,   3,  27, },
89{ 20,  12,  18,  10,  21,  13,  19,  11, },
90{  1,  25,   7,  31,   0,  24,   6,  30, },
91};
92
93DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94{  0,  55,  14,  68,   3,  58,  17,  72, },
95{ 37,  18,  50,  32,  40,  22,  54,  35, },
96{  9,  64,   5,  59,  13,  67,   8,  63, },
97{ 46,  27,  41,  23,  49,  31,  44,  26, },
98{  2,  57,  16,  71,   1,  56,  15,  70, },
99{ 39,  21,  52,  34,  38,  19,  51,  33, },
100{ 11,  66,   7,  62,  10,  65,   6,  60, },
101{ 48,  30,  43,  25,  47,  29,  42,  24, },
102};
103
104#if 1
105DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106{117,  62, 158, 103, 113,  58, 155, 100, },
107{ 34, 199,  21, 186,  31, 196,  17, 182, },
108{144,  89, 131,  76, 141,  86, 127,  72, },
109{  0, 165,  41, 206,  10, 175,  52, 217, },
110{110,  55, 151,  96, 120,  65, 162, 107, },
111{ 28, 193,  14, 179,  38, 203,  24, 189, },
112{138,  83, 124,  69, 148,  93, 134,  79, },
113{  7, 172,  48, 213,   3, 168,  45, 210, },
114};
115#elif 1
116// tries to correct a gamma of 1.5
117DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118{  0, 143,  18, 200,   2, 156,  25, 215, },
119{ 78,  28, 125,  64,  89,  36, 138,  74, },
120{ 10, 180,   3, 161,  16, 195,   8, 175, },
121{109,  51,  93,  38, 121,  60, 105,  47, },
122{  1, 152,  23, 210,   0, 147,  20, 205, },
123{ 85,  33, 134,  71,  81,  30, 130,  67, },
124{ 14, 190,   6, 171,  12, 185,   5, 166, },
125{117,  57, 101,  44, 113,  54,  97,  41, },
126};
127#elif 1
128// tries to correct a gamma of 2.0
129DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130{  0, 124,   8, 193,   0, 140,  12, 213, },
131{ 55,  14, 104,  42,  66,  19, 119,  52, },
132{  3, 168,   1, 145,   6, 187,   3, 162, },
133{ 86,  31,  70,  21,  99,  39,  82,  28, },
134{  0, 134,  11, 206,   0, 129,   9, 200, },
135{ 62,  17, 114,  48,  58,  16, 109,  45, },
136{  5, 181,   2, 157,   4, 175,   1, 151, },
137{ 95,  36,  78,  26,  90,  34,  74,  24, },
138};
139#else
140// tries to correct a gamma of 2.5
141DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142{  0, 107,   3, 187,   0, 125,   6, 212, },
143{ 39,   7,  86,  28,  49,  11, 102,  36, },
144{  1, 158,   0, 131,   3, 180,   1, 151, },
145{ 68,  19,  52,  12,  81,  25,  64,  17, },
146{  0, 119,   5, 203,   0, 113,   4, 195, },
147{ 45,   9,  96,  33,  42,   8,  91,  30, },
148{  2, 172,   1, 144,   2, 165,   0, 137, },
149{ 77,  23,  60,  15,  72,  21,  56,  14, },
150};
151#endif
152DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153{  36, 68, 60, 92, 34, 66, 58, 90,},
154{ 100,  4,124, 28, 98,  2,122, 26,},
155{  52, 84, 44, 76, 50, 82, 42, 74,},
156{ 116, 20,108, 12,114, 18,106, 10,},
157{  32, 64, 56, 88, 38, 70, 62, 94,},
158{  96,  0,120, 24,102,  6,126, 30,},
159{  48, 80, 40, 72, 54, 86, 46, 78,},
160{ 112, 16,104,  8,118, 22,110, 14,},
161};
162DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163{  64, 64, 64, 64, 64, 64, 64, 64 };
164
165#define output_pixel(pos, val, bias, signedness) \
166    if (big_endian) { \
167        AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
168    } else { \
169        AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
170    }
171
172static av_always_inline void
173yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174                         int big_endian, int output_bits)
175{
176    int i;
177    int shift = 19 - output_bits;
178
179    for (i = 0; i < dstW; i++) {
180        int val = src[i] + (1 << (shift - 1));
181        output_pixel(&dest[i], val, 0, uint);
182    }
183}
184
185static av_always_inline void
186yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187                         const int32_t **src, uint16_t *dest, int dstW,
188                         int big_endian, int output_bits)
189{
190    int i;
191    int shift = 15 + 16 - output_bits;
192
193    for (i = 0; i < dstW; i++) {
194        int val = 1 << (30-output_bits);
195        int j;
196
197        /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198         * filters (or anything with negative coeffs, the range can be slightly
199         * wider in both directions. To account for this overflow, we subtract
200         * a constant so it always fits in the signed range (assuming a
201         * reasonable filterSize), and re-add that at the end. */
202        val -= 0x40000000;
203        for (j = 0; j < filterSize; j++)
204            val += src[j][i] * filter[j];
205
206        output_pixel(&dest[i], val, 0x8000, int);
207    }
208}
209
210#undef output_pixel
211
212#define output_pixel(pos, val) \
213    if (big_endian) { \
214        AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
215    } else { \
216        AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
217    }
218
219static av_always_inline void
220yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221                         int big_endian, int output_bits)
222{
223    int i;
224    int shift = 15 - output_bits;
225
226    for (i = 0; i < dstW; i++) {
227        int val = src[i] + (1 << (shift - 1));
228        output_pixel(&dest[i], val);
229    }
230}
231
232static av_always_inline void
233yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234                         const int16_t **src, uint16_t *dest, int dstW,
235                         int big_endian, int output_bits)
236{
237    int i;
238    int shift = 11 + 16 - output_bits;
239
240    for (i = 0; i < dstW; i++) {
241        int val = 1 << (26-output_bits);
242        int j;
243
244        for (j = 0; j < filterSize; j++)
245            val += src[j][i] * filter[j];
246
247        output_pixel(&dest[i], val);
248    }
249}
250
251#undef output_pixel
252
253#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255                              uint8_t *dest, int dstW, \
256                              const uint8_t *dither, int offset)\
257{ \
258    yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259                         (uint16_t *) dest, dstW, is_be, bits); \
260}\
261static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262                              const int16_t **src, uint8_t *dest, int dstW, \
263                              const uint8_t *dither, int offset)\
264{ \
265    yuv2planeX_## template_size ## _c_template(filter, \
266                         filterSize, (const typeX_t **) src, \
267                         (uint16_t *) dest, dstW, is_be, bits); \
268}
269yuv2NBPS( 9, BE, 1, 10, int16_t)
270yuv2NBPS( 9, LE, 0, 10, int16_t)
271yuv2NBPS(10, BE, 1, 10, int16_t)
272yuv2NBPS(10, LE, 0, 10, int16_t)
273yuv2NBPS(16, BE, 1, 16, int32_t)
274yuv2NBPS(16, LE, 0, 16, int32_t)
275
276static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277                           const int16_t **src, uint8_t *dest, int dstW,
278                           const uint8_t *dither, int offset)
279{
280    int i;
281    for (i=0; i<dstW; i++) {
282        int val = dither[(i + offset) & 7] << 12;
283        int j;
284        for (j=0; j<filterSize; j++)
285            val += src[j][i] * filter[j];
286
287        dest[i]= av_clip_uint8(val>>19);
288    }
289}
290
291static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292                           const uint8_t *dither, int offset)
293{
294    int i;
295    for (i=0; i<dstW; i++) {
296        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297        dest[i]= av_clip_uint8(val);
298    }
299}
300
301static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302                        const int16_t **chrUSrc, const int16_t **chrVSrc,
303                        uint8_t *dest, int chrDstW)
304{
305    enum PixelFormat dstFormat = c->dstFormat;
306    const uint8_t *chrDither = c->chrDither8;
307    int i;
308
309    if (dstFormat == PIX_FMT_NV12)
310        for (i=0; i<chrDstW; i++) {
311            int u = chrDither[i & 7] << 12;
312            int v = chrDither[(i + 3) & 7] << 12;
313            int j;
314            for (j=0; j<chrFilterSize; j++) {
315                u += chrUSrc[j][i] * chrFilter[j];
316                v += chrVSrc[j][i] * chrFilter[j];
317            }
318
319            dest[2*i]= av_clip_uint8(u>>19);
320            dest[2*i+1]= av_clip_uint8(v>>19);
321        }
322    else
323        for (i=0; i<chrDstW; i++) {
324            int u = chrDither[i & 7] << 12;
325            int v = chrDither[(i + 3) & 7] << 12;
326            int j;
327            for (j=0; j<chrFilterSize; j++) {
328                u += chrUSrc[j][i] * chrFilter[j];
329                v += chrVSrc[j][i] * chrFilter[j];
330            }
331
332            dest[2*i]= av_clip_uint8(v>>19);
333            dest[2*i+1]= av_clip_uint8(u>>19);
334        }
335}
336
337#define output_pixel(pos, val) \
338        if (target == PIX_FMT_GRAY16BE) { \
339            AV_WB16(pos, val); \
340        } else { \
341            AV_WL16(pos, val); \
342        }
343
344static av_always_inline void
345yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346                        const int32_t **lumSrc, int lumFilterSize,
347                        const int16_t *chrFilter, const int32_t **chrUSrc,
348                        const int32_t **chrVSrc, int chrFilterSize,
349                        const int32_t **alpSrc, uint16_t *dest, int dstW,
350                        int y, enum PixelFormat target)
351{
352    int i;
353
354    for (i = 0; i < (dstW >> 1); i++) {
355        int j;
356        int Y1 = (1 << 14) - 0x40000000;
357        int Y2 = (1 << 14) - 0x40000000;
358
359        for (j = 0; j < lumFilterSize; j++) {
360            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
361            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
362        }
363        Y1 >>= 15;
364        Y2 >>= 15;
365        Y1 = av_clip_int16(Y1);
366        Y2 = av_clip_int16(Y2);
367        output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368        output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
369    }
370}
371
372static av_always_inline void
373yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374                        const int32_t *ubuf[2], const int32_t *vbuf[2],
375                        const int32_t *abuf[2], uint16_t *dest, int dstW,
376                        int yalpha, int uvalpha, int y,
377                        enum PixelFormat target)
378{
379    int  yalpha1 = 4095 - yalpha;
380    int i;
381    const int32_t *buf0 = buf[0], *buf1 = buf[1];
382
383    for (i = 0; i < (dstW >> 1); i++) {
384        int Y1 = (buf0[i * 2    ] * yalpha1 + buf1[i * 2    ] * yalpha) >> 15;
385        int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
386
387        output_pixel(&dest[i * 2 + 0], Y1);
388        output_pixel(&dest[i * 2 + 1], Y2);
389    }
390}
391
392static av_always_inline void
393yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394                        const int32_t *ubuf[2], const int32_t *vbuf[2],
395                        const int32_t *abuf0, uint16_t *dest, int dstW,
396                        int uvalpha, int y, enum PixelFormat target)
397{
398    int i;
399
400    for (i = 0; i < (dstW >> 1); i++) {
401        int Y1 = buf0[i * 2    ] << 1;
402        int Y2 = buf0[i * 2 + 1] << 1;
403
404        output_pixel(&dest[i * 2 + 0], Y1);
405        output_pixel(&dest[i * 2 + 1], Y2);
406    }
407}
408
409#undef output_pixel
410
411#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413                        const int16_t **_lumSrc, int lumFilterSize, \
414                        const int16_t *chrFilter, const int16_t **_chrUSrc, \
415                        const int16_t **_chrVSrc, int chrFilterSize, \
416                        const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
417                        int y) \
418{ \
419    const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
420                  **chrUSrc = (const int32_t **) _chrUSrc, \
421                  **chrVSrc = (const int32_t **) _chrVSrc, \
422                  **alpSrc  = (const int32_t **) _alpSrc; \
423    uint16_t *dest = (uint16_t *) _dest; \
424    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425                          chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426                          alpSrc, dest, dstW, y, fmt); \
427} \
428 \
429static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430                        const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431                        const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432                        int yalpha, int uvalpha, int y) \
433{ \
434    const int32_t **buf  = (const int32_t **) _buf, \
435                  **ubuf = (const int32_t **) _ubuf, \
436                  **vbuf = (const int32_t **) _vbuf, \
437                  **abuf = (const int32_t **) _abuf; \
438    uint16_t *dest = (uint16_t *) _dest; \
439    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440                          dest, dstW, yalpha, uvalpha, y, fmt); \
441} \
442 \
443static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444                        const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445                        const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446                        int uvalpha, int y) \
447{ \
448    const int32_t *buf0  = (const int32_t *)  _buf0, \
449                 **ubuf  = (const int32_t **) _ubuf, \
450                 **vbuf  = (const int32_t **) _vbuf, \
451                  *abuf0 = (const int32_t *)  _abuf0; \
452    uint16_t *dest = (uint16_t *) _dest; \
453    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454                                  dstW, uvalpha, y, fmt); \
455}
456
457YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
459
460#define output_pixel(pos, acc) \
461    if (target == PIX_FMT_MONOBLACK) { \
462        pos = acc; \
463    } else { \
464        pos = ~acc; \
465    }
466
467static av_always_inline void
468yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469                      const int16_t **lumSrc, int lumFilterSize,
470                      const int16_t *chrFilter, const int16_t **chrUSrc,
471                      const int16_t **chrVSrc, int chrFilterSize,
472                      const int16_t **alpSrc, uint8_t *dest, int dstW,
473                      int y, enum PixelFormat target)
474{
475    const uint8_t * const d128=dither_8x8_220[y&7];
476    uint8_t *g = c->table_gU[128] + c->table_gV[128];
477    int i;
478    unsigned acc = 0;
479
480    for (i = 0; i < dstW - 1; i += 2) {
481        int j;
482        int Y1 = 1 << 18;
483        int Y2 = 1 << 18;
484
485        for (j = 0; j < lumFilterSize; j++) {
486            Y1 += lumSrc[j][i]   * lumFilter[j];
487            Y2 += lumSrc[j][i+1] * lumFilter[j];
488        }
489        Y1 >>= 19;
490        Y2 >>= 19;
491        if ((Y1 | Y2) & 0x100) {
492            Y1 = av_clip_uint8(Y1);
493            Y2 = av_clip_uint8(Y2);
494        }
495        acc += acc + g[Y1 + d128[(i + 0) & 7]];
496        acc += acc + g[Y2 + d128[(i + 1) & 7]];
497        if ((i & 7) == 6) {
498            output_pixel(*dest++, acc);
499        }
500    }
501}
502
503static av_always_inline void
504yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505                      const int16_t *ubuf[2], const int16_t *vbuf[2],
506                      const int16_t *abuf[2], uint8_t *dest, int dstW,
507                      int yalpha, int uvalpha, int y,
508                      enum PixelFormat target)
509{
510    const int16_t *buf0  = buf[0],  *buf1  = buf[1];
511    const uint8_t * const d128 = dither_8x8_220[y & 7];
512    uint8_t *g = c->table_gU[128] + c->table_gV[128];
513    int  yalpha1 = 4095 - yalpha;
514    int i;
515
516    for (i = 0; i < dstW - 7; i += 8) {
517        int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
518        acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519        acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520        acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521        acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522        acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523        acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524        acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525        output_pixel(*dest++, acc);
526    }
527}
528
529static av_always_inline void
530yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531                      const int16_t *ubuf[2], const int16_t *vbuf[2],
532                      const int16_t *abuf0, uint8_t *dest, int dstW,
533                      int uvalpha, int y, enum PixelFormat target)
534{
535    const uint8_t * const d128 = dither_8x8_220[y & 7];
536    uint8_t *g = c->table_gU[128] + c->table_gV[128];
537    int i;
538
539    for (i = 0; i < dstW - 7; i += 8) {
540        int acc =    g[(buf0[i    ] >> 7) + d128[0]];
541        acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542        acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543        acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544        acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545        acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546        acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547        acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548        output_pixel(*dest++, acc);
549    }
550}
551
552#undef output_pixel
553
554#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556                                const int16_t **lumSrc, int lumFilterSize, \
557                                const int16_t *chrFilter, const int16_t **chrUSrc, \
558                                const int16_t **chrVSrc, int chrFilterSize, \
559                                const int16_t **alpSrc, uint8_t *dest, int dstW, \
560                                int y) \
561{ \
562    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563                                  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564                                  alpSrc, dest, dstW, y, fmt); \
565} \
566 \
567static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
569                                const int16_t *abuf[2], uint8_t *dest, int dstW, \
570                                int yalpha, int uvalpha, int y) \
571{ \
572    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573                                  dest, dstW, yalpha, uvalpha, y, fmt); \
574} \
575 \
576static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
578                                const int16_t *abuf0, uint8_t *dest, int dstW, \
579                                int uvalpha, int y) \
580{ \
581    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582                                  abuf0, dest, dstW, uvalpha, \
583                                  y, fmt); \
584}
585
586YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
588
589#define output_pixels(pos, Y1, U, Y2, V) \
590    if (target == PIX_FMT_YUYV422) { \
591        dest[pos + 0] = Y1; \
592        dest[pos + 1] = U;  \
593        dest[pos + 2] = Y2; \
594        dest[pos + 3] = V;  \
595    } else { \
596        dest[pos + 0] = U;  \
597        dest[pos + 1] = Y1; \
598        dest[pos + 2] = V;  \
599        dest[pos + 3] = Y2; \
600    }
601
602static av_always_inline void
603yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604                     const int16_t **lumSrc, int lumFilterSize,
605                     const int16_t *chrFilter, const int16_t **chrUSrc,
606                     const int16_t **chrVSrc, int chrFilterSize,
607                     const int16_t **alpSrc, uint8_t *dest, int dstW,
608                     int y, enum PixelFormat target)
609{
610    int i;
611
612    for (i = 0; i < (dstW >> 1); i++) {
613        int j;
614        int Y1 = 1 << 18;
615        int Y2 = 1 << 18;
616        int U  = 1 << 18;
617        int V  = 1 << 18;
618
619        for (j = 0; j < lumFilterSize; j++) {
620            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
621            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
622        }
623        for (j = 0; j < chrFilterSize; j++) {
624            U += chrUSrc[j][i] * chrFilter[j];
625            V += chrVSrc[j][i] * chrFilter[j];
626        }
627        Y1 >>= 19;
628        Y2 >>= 19;
629        U  >>= 19;
630        V  >>= 19;
631        if ((Y1 | Y2 | U | V) & 0x100) {
632            Y1 = av_clip_uint8(Y1);
633            Y2 = av_clip_uint8(Y2);
634            U  = av_clip_uint8(U);
635            V  = av_clip_uint8(V);
636        }
637        output_pixels(4*i, Y1, U, Y2, V);
638    }
639}
640
641static av_always_inline void
642yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643                     const int16_t *ubuf[2], const int16_t *vbuf[2],
644                     const int16_t *abuf[2], uint8_t *dest, int dstW,
645                     int yalpha, int uvalpha, int y,
646                     enum PixelFormat target)
647{
648    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
649                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651    int  yalpha1 = 4095 - yalpha;
652    int uvalpha1 = 4095 - uvalpha;
653    int i;
654
655    for (i = 0; i < (dstW >> 1); i++) {
656        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
657        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
658        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
659        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
660
661        output_pixels(i * 4, Y1, U, Y2, V);
662    }
663}
664
665static av_always_inline void
666yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667                     const int16_t *ubuf[2], const int16_t *vbuf[2],
668                     const int16_t *abuf0, uint8_t *dest, int dstW,
669                     int uvalpha, int y, enum PixelFormat target)
670{
671    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
673    int i;
674
675    if (uvalpha < 2048) {
676        for (i = 0; i < (dstW >> 1); i++) {
677            int Y1 = buf0[i * 2]     >> 7;
678            int Y2 = buf0[i * 2 + 1] >> 7;
679            int U  = ubuf1[i]        >> 7;
680            int V  = vbuf1[i]        >> 7;
681
682            output_pixels(i * 4, Y1, U, Y2, V);
683        }
684    } else {
685        for (i = 0; i < (dstW >> 1); i++) {
686            int Y1 =  buf0[i * 2]          >> 7;
687            int Y2 =  buf0[i * 2 + 1]      >> 7;
688            int U  = (ubuf0[i] + ubuf1[i]) >> 8;
689            int V  = (vbuf0[i] + vbuf1[i]) >> 8;
690
691            output_pixels(i * 4, Y1, U, Y2, V);
692        }
693    }
694}
695
696#undef output_pixels
697
698YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
700
701#define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702#define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703#define output_pixel(pos, val) \
704    if (isBE(target)) { \
705        AV_WB16(pos, val); \
706    } else { \
707        AV_WL16(pos, val); \
708    }
709
710static av_always_inline void
711yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712                       const int32_t **lumSrc, int lumFilterSize,
713                       const int16_t *chrFilter, const int32_t **chrUSrc,
714                       const int32_t **chrVSrc, int chrFilterSize,
715                       const int32_t **alpSrc, uint16_t *dest, int dstW,
716                       int y, enum PixelFormat target)
717{
718    int i;
719
720    for (i = 0; i < (dstW >> 1); i++) {
721        int j;
722        int Y1 = -0x40000000;
723        int Y2 = -0x40000000;
724        int U  = -128 << 23; // 19
725        int V  = -128 << 23;
726        int R, G, B;
727
728        for (j = 0; j < lumFilterSize; j++) {
729            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
730            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
731        }
732        for (j = 0; j < chrFilterSize; j++) {
733            U += chrUSrc[j][i] * chrFilter[j];
734            V += chrVSrc[j][i] * chrFilter[j];
735        }
736
737        // 8bit: 12+15=27; 16-bit: 12+19=31
738        Y1 >>= 14; // 10
739        Y1 += 0x10000;
740        Y2 >>= 14;
741        Y2 += 0x10000;
742        U  >>= 14;
743        V  >>= 14;
744
745        // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746        Y1 -= c->yuv2rgb_y_offset;
747        Y2 -= c->yuv2rgb_y_offset;
748        Y1 *= c->yuv2rgb_y_coeff;
749        Y2 *= c->yuv2rgb_y_coeff;
750        Y1 += 1 << 13; // 21
751        Y2 += 1 << 13;
752        // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
753
754        R = V * c->yuv2rgb_v2r_coeff;
755        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756        B =                            U * c->yuv2rgb_u2b_coeff;
757
758        // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
761        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
764        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
765        dest += 6;
766    }
767}
768
769static av_always_inline void
770yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771                       const int32_t *ubuf[2], const int32_t *vbuf[2],
772                       const int32_t *abuf[2], uint16_t *dest, int dstW,
773                       int yalpha, int uvalpha, int y,
774                       enum PixelFormat target)
775{
776    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
777                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779    int  yalpha1 = 4095 - yalpha;
780    int uvalpha1 = 4095 - uvalpha;
781    int i;
782
783    for (i = 0; i < (dstW >> 1); i++) {
784        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
785        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
786        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
787        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
788        int R, G, B;
789
790        Y1 -= c->yuv2rgb_y_offset;
791        Y2 -= c->yuv2rgb_y_offset;
792        Y1 *= c->yuv2rgb_y_coeff;
793        Y2 *= c->yuv2rgb_y_coeff;
794        Y1 += 1 << 13;
795        Y2 += 1 << 13;
796
797        R = V * c->yuv2rgb_v2r_coeff;
798        G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799        B =                            U * c->yuv2rgb_u2b_coeff;
800
801        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
803        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
806        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
807        dest += 6;
808    }
809}
810
811static av_always_inline void
812yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813                       const int32_t *ubuf[2], const int32_t *vbuf[2],
814                       const int32_t *abuf0, uint16_t *dest, int dstW,
815                       int uvalpha, int y, enum PixelFormat target)
816{
817    const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
819    int i;
820
821    if (uvalpha < 2048) {
822        for (i = 0; i < (dstW >> 1); i++) {
823            int Y1 = (buf0[i * 2]    ) >> 2;
824            int Y2 = (buf0[i * 2 + 1]) >> 2;
825            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
826            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
827            int R, G, B;
828
829            Y1 -= c->yuv2rgb_y_offset;
830            Y2 -= c->yuv2rgb_y_offset;
831            Y1 *= c->yuv2rgb_y_coeff;
832            Y2 *= c->yuv2rgb_y_coeff;
833            Y1 += 1 << 13;
834            Y2 += 1 << 13;
835
836            R = V * c->yuv2rgb_v2r_coeff;
837            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838            B =                            U * c->yuv2rgb_u2b_coeff;
839
840            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
842            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
845            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
846            dest += 6;
847        }
848    } else {
849        for (i = 0; i < (dstW >> 1); i++) {
850            int Y1 = (buf0[i * 2]    ) >> 2;
851            int Y2 = (buf0[i * 2 + 1]) >> 2;
852            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
854            int R, G, B;
855
856            Y1 -= c->yuv2rgb_y_offset;
857            Y2 -= c->yuv2rgb_y_offset;
858            Y1 *= c->yuv2rgb_y_coeff;
859            Y2 *= c->yuv2rgb_y_coeff;
860            Y1 += 1 << 13;
861            Y2 += 1 << 13;
862
863            R = V * c->yuv2rgb_v2r_coeff;
864            G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865            B =                            U * c->yuv2rgb_u2b_coeff;
866
867            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
869            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
872            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
873            dest += 6;
874        }
875    }
876}
877
878#undef output_pixel
879#undef r_b
880#undef b_r
881
882YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
886
887/*
888 * Write out 2 RGB pixels in the target pixel format. This function takes a
889 * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890 * things like endianness conversion and shifting. The caller takes care of
891 * setting the correct offset in these tables from the chroma (U/V) values.
892 * This function then uses the luminance (Y1/Y2) values to write out the
893 * correct RGB values into the destination buffer.
894 */
895static av_always_inline void
896yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897              unsigned A1, unsigned A2,
898              const void *_r, const void *_g, const void *_b, int y,
899              enum PixelFormat target, int hasAlpha)
900{
901    if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902        target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903        uint32_t *dest = (uint32_t *) _dest;
904        const uint32_t *r = (const uint32_t *) _r;
905        const uint32_t *g = (const uint32_t *) _g;
906        const uint32_t *b = (const uint32_t *) _b;
907
908#if CONFIG_SMALL
909        int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
910
911        dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912        dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
913#else
914        if (hasAlpha) {
915            int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
916
917            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
919        } else {
920            dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921            dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
922        }
923#endif
924    } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925        uint8_t *dest = (uint8_t *) _dest;
926        const uint8_t *r = (const uint8_t *) _r;
927        const uint8_t *g = (const uint8_t *) _g;
928        const uint8_t *b = (const uint8_t *) _b;
929
930#define r_b ((target == PIX_FMT_RGB24) ? r : b)
931#define b_r ((target == PIX_FMT_RGB24) ? b : r)
932        dest[i * 6 + 0] = r_b[Y1];
933        dest[i * 6 + 1] =   g[Y1];
934        dest[i * 6 + 2] = b_r[Y1];
935        dest[i * 6 + 3] = r_b[Y2];
936        dest[i * 6 + 4] =   g[Y2];
937        dest[i * 6 + 5] = b_r[Y2];
938#undef r_b
939#undef b_r
940    } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941               target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942               target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943        uint16_t *dest = (uint16_t *) _dest;
944        const uint16_t *r = (const uint16_t *) _r;
945        const uint16_t *g = (const uint16_t *) _g;
946        const uint16_t *b = (const uint16_t *) _b;
947        int dr1, dg1, db1, dr2, dg2, db2;
948
949        if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950            dr1 = dither_2x2_8[ y & 1     ][0];
951            dg1 = dither_2x2_4[ y & 1     ][0];
952            db1 = dither_2x2_8[(y & 1) ^ 1][0];
953            dr2 = dither_2x2_8[ y & 1     ][1];
954            dg2 = dither_2x2_4[ y & 1     ][1];
955            db2 = dither_2x2_8[(y & 1) ^ 1][1];
956        } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957            dr1 = dither_2x2_8[ y & 1     ][0];
958            dg1 = dither_2x2_8[ y & 1     ][1];
959            db1 = dither_2x2_8[(y & 1) ^ 1][0];
960            dr2 = dither_2x2_8[ y & 1     ][1];
961            dg2 = dither_2x2_8[ y & 1     ][0];
962            db2 = dither_2x2_8[(y & 1) ^ 1][1];
963        } else {
964            dr1 = dither_4x4_16[ y & 3     ][0];
965            dg1 = dither_4x4_16[ y & 3     ][1];
966            db1 = dither_4x4_16[(y & 3) ^ 3][0];
967            dr2 = dither_4x4_16[ y & 3     ][1];
968            dg2 = dither_4x4_16[ y & 3     ][0];
969            db2 = dither_4x4_16[(y & 3) ^ 3][1];
970        }
971
972        dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973        dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974    } else /* 8/4-bit */ {
975        uint8_t *dest = (uint8_t *) _dest;
976        const uint8_t *r = (const uint8_t *) _r;
977        const uint8_t *g = (const uint8_t *) _g;
978        const uint8_t *b = (const uint8_t *) _b;
979        int dr1, dg1, db1, dr2, dg2, db2;
980
981        if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982            const uint8_t * const d64 = dither_8x8_73[y & 7];
983            const uint8_t * const d32 = dither_8x8_32[y & 7];
984            dr1 = dg1 = d32[(i * 2 + 0) & 7];
985            db1 =       d64[(i * 2 + 0) & 7];
986            dr2 = dg2 = d32[(i * 2 + 1) & 7];
987            db2 =       d64[(i * 2 + 1) & 7];
988        } else {
989            const uint8_t * const d64  = dither_8x8_73 [y & 7];
990            const uint8_t * const d128 = dither_8x8_220[y & 7];
991            dr1 = db1 = d128[(i * 2 + 0) & 7];
992            dg1 =        d64[(i * 2 + 0) & 7];
993            dr2 = db2 = d128[(i * 2 + 1) & 7];
994            dg2 =        d64[(i * 2 + 1) & 7];
995        }
996
997        if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998            dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999                    ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1000        } else {
1001            dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002            dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1003        }
1004    }
1005}
1006
1007static av_always_inline void
1008yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009                     const int16_t **lumSrc, int lumFilterSize,
1010                     const int16_t *chrFilter, const int16_t **chrUSrc,
1011                     const int16_t **chrVSrc, int chrFilterSize,
1012                     const int16_t **alpSrc, uint8_t *dest, int dstW,
1013                     int y, enum PixelFormat target, int hasAlpha)
1014{
1015    int i;
1016
1017    for (i = 0; i < (dstW >> 1); i++) {
1018        int j;
1019        int Y1 = 1 << 18;
1020        int Y2 = 1 << 18;
1021        int U  = 1 << 18;
1022        int V  = 1 << 18;
1023        int av_unused A1, A2;
1024        const void *r, *g, *b;
1025
1026        for (j = 0; j < lumFilterSize; j++) {
1027            Y1 += lumSrc[j][i * 2]     * lumFilter[j];
1028            Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1029        }
1030        for (j = 0; j < chrFilterSize; j++) {
1031            U += chrUSrc[j][i] * chrFilter[j];
1032            V += chrVSrc[j][i] * chrFilter[j];
1033        }
1034        Y1 >>= 19;
1035        Y2 >>= 19;
1036        U  >>= 19;
1037        V  >>= 19;
1038        if ((Y1 | Y2 | U | V) & 0x100) {
1039            Y1 = av_clip_uint8(Y1);
1040            Y2 = av_clip_uint8(Y2);
1041            U  = av_clip_uint8(U);
1042            V  = av_clip_uint8(V);
1043        }
1044        if (hasAlpha) {
1045            A1 = 1 << 18;
1046            A2 = 1 << 18;
1047            for (j = 0; j < lumFilterSize; j++) {
1048                A1 += alpSrc[j][i * 2    ] * lumFilter[j];
1049                A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1050            }
1051            A1 >>= 19;
1052            A2 >>= 19;
1053            if ((A1 | A2) & 0x100) {
1054                A1 = av_clip_uint8(A1);
1055                A2 = av_clip_uint8(A2);
1056            }
1057        }
1058
1059        /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1060        r =  c->table_rV[V];
1061        g = (c->table_gU[U] + c->table_gV[V]);
1062        b =  c->table_bU[U];
1063
1064        yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065                      r, g, b, y, target, hasAlpha);
1066    }
1067}
1068
1069static av_always_inline void
1070yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071                     const int16_t *ubuf[2], const int16_t *vbuf[2],
1072                     const int16_t *abuf[2], uint8_t *dest, int dstW,
1073                     int yalpha, int uvalpha, int y,
1074                     enum PixelFormat target, int hasAlpha)
1075{
1076    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
1077                  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079                  *abuf0 = hasAlpha ? abuf[0] : NULL,
1080                  *abuf1 = hasAlpha ? abuf[1] : NULL;
1081    int  yalpha1 = 4095 - yalpha;
1082    int uvalpha1 = 4095 - uvalpha;
1083    int i;
1084
1085    for (i = 0; i < (dstW >> 1); i++) {
1086        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
1087        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
1088        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
1089        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
1090        int A1, A2;
1091        const void *r =  c->table_rV[V],
1092                   *g = (c->table_gU[U] + c->table_gV[V]),
1093                   *b =  c->table_bU[U];
1094
1095        if (hasAlpha) {
1096            A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
1097            A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1098        }
1099
1100        yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101                      r, g, b, y, target, hasAlpha);
1102    }
1103}
1104
1105static av_always_inline void
1106yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107                     const int16_t *ubuf[2], const int16_t *vbuf[2],
1108                     const int16_t *abuf0, uint8_t *dest, int dstW,
1109                     int uvalpha, int y, enum PixelFormat target,
1110                     int hasAlpha)
1111{
1112    const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1114    int i;
1115
1116    if (uvalpha < 2048) {
1117        for (i = 0; i < (dstW >> 1); i++) {
1118            int Y1 = buf0[i * 2]     >> 7;
1119            int Y2 = buf0[i * 2 + 1] >> 7;
1120            int U  = ubuf1[i]        >> 7;
1121            int V  = vbuf1[i]        >> 7;
1122            int A1, A2;
1123            const void *r =  c->table_rV[V],
1124                       *g = (c->table_gU[U] + c->table_gV[V]),
1125                       *b =  c->table_bU[U];
1126
1127            if (hasAlpha) {
1128                A1 = abuf0[i * 2    ] >> 7;
1129                A2 = abuf0[i * 2 + 1] >> 7;
1130            }
1131
1132            yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133                          r, g, b, y, target, hasAlpha);
1134        }
1135    } else {
1136        for (i = 0; i < (dstW >> 1); i++) {
1137            int Y1 =  buf0[i * 2]          >> 7;
1138            int Y2 =  buf0[i * 2 + 1]      >> 7;
1139            int U  = (ubuf0[i] + ubuf1[i]) >> 8;
1140            int V  = (vbuf0[i] + vbuf1[i]) >> 8;
1141            int A1, A2;
1142            const void *r =  c->table_rV[V],
1143                       *g = (c->table_gU[U] + c->table_gV[V]),
1144                       *b =  c->table_bU[U];
1145
1146            if (hasAlpha) {
1147                A1 = abuf0[i * 2    ] >> 7;
1148                A2 = abuf0[i * 2 + 1] >> 7;
1149            }
1150
1151            yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152                          r, g, b, y, target, hasAlpha);
1153        }
1154    }
1155}
1156
1157#define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159                                const int16_t **lumSrc, int lumFilterSize, \
1160                                const int16_t *chrFilter, const int16_t **chrUSrc, \
1161                                const int16_t **chrVSrc, int chrFilterSize, \
1162                                const int16_t **alpSrc, uint8_t *dest, int dstW, \
1163                                int y) \
1164{ \
1165    name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166                                  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167                                  alpSrc, dest, dstW, y, fmt, hasAlpha); \
1168}
1169#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173                                const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174                                int yalpha, int uvalpha, int y) \
1175{ \
1176    name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177                                  dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1178} \
1179 \
1180static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181                                const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182                                const int16_t *abuf0, uint8_t *dest, int dstW, \
1183                                int uvalpha, int y) \
1184{ \
1185    name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186                                  dstW, uvalpha, y, fmt, hasAlpha); \
1187}
1188
1189#if CONFIG_SMALL
1190YUV2RGBWRAPPER(yuv2rgb,,  32_1,  PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1191YUV2RGBWRAPPER(yuv2rgb,,  32,    PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1192#else
1193#if CONFIG_SWSCALE_ALPHA
1194YUV2RGBWRAPPER(yuv2rgb,, a32_1,  PIX_FMT_RGB32_1,   1)
1195YUV2RGBWRAPPER(yuv2rgb,, a32,    PIX_FMT_RGB32,     1)
1196#endif
1197YUV2RGBWRAPPER(yuv2rgb,, x32_1,  PIX_FMT_RGB32_1,   0)
1198YUV2RGBWRAPPER(yuv2rgb,, x32,    PIX_FMT_RGB32,     0)
1199#endif
1200YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24,   0)
1201YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24,   0)
1202YUV2RGBWRAPPER(yuv2rgb,,  16,    PIX_FMT_RGB565,    0)
1203YUV2RGBWRAPPER(yuv2rgb,,  15,    PIX_FMT_RGB555,    0)
1204YUV2RGBWRAPPER(yuv2rgb,,  12,    PIX_FMT_RGB444,    0)
1205YUV2RGBWRAPPER(yuv2rgb,,   8,    PIX_FMT_RGB8,      0)
1206YUV2RGBWRAPPER(yuv2rgb,,   4,    PIX_FMT_RGB4,      0)
1207YUV2RGBWRAPPER(yuv2rgb,,   4b,   PIX_FMT_RGB4_BYTE, 0)
1208
1209static av_always_inline void
1210yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211                          const int16_t **lumSrc, int lumFilterSize,
1212                          const int16_t *chrFilter, const int16_t **chrUSrc,
1213                          const int16_t **chrVSrc, int chrFilterSize,
1214                          const int16_t **alpSrc, uint8_t *dest,
1215                          int dstW, int y, enum PixelFormat target, int hasAlpha)
1216{
1217    int i;
1218    int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1219
1220    for (i = 0; i < dstW; i++) {
1221        int j;
1222        int Y = 0;
1223        int U = -128 << 19;
1224        int V = -128 << 19;
1225        int av_unused A;
1226        int R, G, B;
1227
1228        for (j = 0; j < lumFilterSize; j++) {
1229            Y += lumSrc[j][i] * lumFilter[j];
1230        }
1231        for (j = 0; j < chrFilterSize; j++) {
1232            U += chrUSrc[j][i] * chrFilter[j];
1233            V += chrVSrc[j][i] * chrFilter[j];
1234        }
1235        Y >>= 10;
1236        U >>= 10;
1237        V >>= 10;
1238        if (hasAlpha) {
1239            A = 1 << 21;
1240            for (j = 0; j < lumFilterSize; j++) {
1241                A += alpSrc[j][i] * lumFilter[j];
1242            }
1243            A >>= 19;
1244            if (A & 0x100)
1245                A = av_clip_uint8(A);
1246        }
1247        Y -= c->yuv2rgb_y_offset;
1248        Y *= c->yuv2rgb_y_coeff;
1249        Y += 1 << 21;
1250        R = Y + V*c->yuv2rgb_v2r_coeff;
1251        G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252        B = Y +                          U*c->yuv2rgb_u2b_coeff;
1253        if ((R | G | B) & 0xC0000000) {
1254            R = av_clip_uintp2(R, 30);
1255            G = av_clip_uintp2(G, 30);
1256            B = av_clip_uintp2(B, 30);
1257        }
1258
1259        switch(target) {
1260        case PIX_FMT_ARGB:
1261            dest[0] = hasAlpha ? A : 255;
1262            dest[1] = R >> 22;
1263            dest[2] = G >> 22;
1264            dest[3] = B >> 22;
1265            break;
1266        case PIX_FMT_RGB24:
1267            dest[0] = R >> 22;
1268            dest[1] = G >> 22;
1269            dest[2] = B >> 22;
1270            break;
1271        case PIX_FMT_RGBA:
1272            dest[0] = R >> 22;
1273            dest[1] = G >> 22;
1274            dest[2] = B >> 22;
1275            dest[3] = hasAlpha ? A : 255;
1276            break;
1277        case PIX_FMT_ABGR:
1278            dest[0] = hasAlpha ? A : 255;
1279            dest[1] = B >> 22;
1280            dest[2] = G >> 22;
1281            dest[3] = R >> 22;
1282            dest += 4;
1283            break;
1284        case PIX_FMT_BGR24:
1285            dest[0] = B >> 22;
1286            dest[1] = G >> 22;
1287            dest[2] = R >> 22;
1288            break;
1289        case PIX_FMT_BGRA:
1290            dest[0] = B >> 22;
1291            dest[1] = G >> 22;
1292            dest[2] = R >> 22;
1293            dest[3] = hasAlpha ? A : 255;
1294            break;
1295        }
1296        dest += step;
1297    }
1298}
1299
1300#if CONFIG_SMALL
1301YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1305#else
1306#if CONFIG_SWSCALE_ALPHA
1307YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA,  1)
1308YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR,  1)
1309YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA,  1)
1310YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB,  1)
1311#endif
1312YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA,  0)
1313YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR,  0)
1314YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA,  0)
1315YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0)
1316#endif
1317YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0)
1318YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0)
1319
1320static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321                                       int width, int height,
1322                                       int y, uint8_t val)
1323{
1324    int i;
1325    uint8_t *ptr = plane + stride*y;
1326    for (i=0; i<height; i++) {
1327        memset(ptr, val, width);
1328        ptr += stride;
1329    }
1330}
1331
1332#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1333
1334#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1336
1337static av_always_inline void
1338rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339                    enum PixelFormat origin)
1340{
1341    int i;
1342    for (i = 0; i < width; i++) {
1343        unsigned int r_b = input_pixel(&src[i*3+0]);
1344        unsigned int   g = input_pixel(&src[i*3+1]);
1345        unsigned int b_r = input_pixel(&src[i*3+2]);
1346
1347        dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1348    }
1349}
1350
1351static av_always_inline void
1352rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353                    const uint16_t *src1, const uint16_t *src2,
1354                    int width, enum PixelFormat origin)
1355{
1356    int i;
1357    assert(src1==src2);
1358    for (i = 0; i < width; i++) {
1359        int r_b = input_pixel(&src1[i*3+0]);
1360        int   g = input_pixel(&src1[i*3+1]);
1361        int b_r = input_pixel(&src1[i*3+2]);
1362
1363        dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364        dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1365    }
1366}
1367
1368static av_always_inline void
1369rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370                          const uint16_t *src1, const uint16_t *src2,
1371                          int width, enum PixelFormat origin)
1372{
1373    int i;
1374    assert(src1==src2);
1375    for (i = 0; i < width; i++) {
1376        int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377        int   g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378        int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1379
1380        dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381        dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1382    }
1383}
1384
1385#undef r
1386#undef b
1387#undef input_pixel
1388
1389#define rgb48funcs(pattern, BE_LE, origin) \
1390static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391                                    int width, uint32_t *unused) \
1392{ \
1393    const uint16_t *src = (const uint16_t *) _src; \
1394    uint16_t *dst = (uint16_t *) _dst; \
1395    rgb48ToY_c_template(dst, src, width, origin); \
1396} \
1397 \
1398static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399                                    const uint8_t *_src1, const uint8_t *_src2, \
1400                                    int width, uint32_t *unused) \
1401{ \
1402    const uint16_t *src1 = (const uint16_t *) _src1, \
1403                   *src2 = (const uint16_t *) _src2; \
1404    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405    rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1406} \
1407 \
1408static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409                                    const uint8_t *_src1, const uint8_t *_src2, \
1410                                    int width, uint32_t *unused) \
1411{ \
1412    const uint16_t *src1 = (const uint16_t *) _src1, \
1413                   *src2 = (const uint16_t *) _src2; \
1414    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415    rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1416}
1417
1418rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
1419rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1422
1423#define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424                         origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425                        (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1426
1427static av_always_inline void
1428rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429                       int width, enum PixelFormat origin,
1430                       int shr,   int shg,   int shb, int shp,
1431                       int maskr, int maskg, int maskb,
1432                       int rsh,   int gsh,   int bsh, int S)
1433{
1434    const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435    const unsigned rnd = 33u << (S - 1);
1436    int i;
1437
1438    for (i = 0; i < width; i++) {
1439        int px = input_pixel(i) >> shp;
1440        int b = (px & maskb) >> shb;
1441        int g = (px & maskg) >> shg;
1442        int r = (px & maskr) >> shr;
1443
1444        dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1445    }
1446}
1447
1448static av_always_inline void
1449rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450                        const uint8_t *src, int width,
1451                        enum PixelFormat origin,
1452                        int shr,   int shg,   int shb, int shp,
1453                        int maskr, int maskg, int maskb,
1454                        int rsh,   int gsh,   int bsh, int S)
1455{
1456    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458    const unsigned rnd = 257u << (S - 1);
1459    int i;
1460
1461    for (i = 0; i < width; i++) {
1462        int px = input_pixel(i) >> shp;
1463        int b = (px & maskb) >> shb;
1464        int g = (px & maskg) >> shg;
1465        int r = (px & maskr) >> shr;
1466
1467        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1469    }
1470}
1471
1472static av_always_inline void
1473rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474                             const uint8_t *src, int width,
1475                             enum PixelFormat origin,
1476                             int shr,   int shg,   int shb, int shp,
1477                             int maskr, int maskg, int maskb,
1478                             int rsh,   int gsh,   int bsh, int S)
1479{
1480    const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481              rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482              maskgx = ~(maskr | maskb);
1483    const unsigned rnd = 257u << S;
1484    int i;
1485
1486    maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487    for (i = 0; i < width; i++) {
1488        int px0 = input_pixel(2 * i + 0) >> shp;
1489        int px1 = input_pixel(2 * i + 1) >> shp;
1490        int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491        int rb = px0 + px1 - g;
1492
1493        b = (rb & maskb) >> shb;
1494        if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495            origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1496            g >>= shg;
1497        } else {
1498            g = (g  & maskg) >> shg;
1499        }
1500        r = (rb & maskr) >> shr;
1501
1502        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1504    }
1505}
1506
1507#undef input_pixel
1508
1509#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510                         maskg, maskb, rsh, gsh, bsh, S) \
1511static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512                          int width, uint32_t *unused) \
1513{ \
1514    rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515                           maskr, maskg, maskb, rsh, gsh, bsh, S); \
1516} \
1517 \
1518static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519                           const uint8_t *src, const uint8_t *dummy, \
1520                           int width, uint32_t *unused) \
1521{ \
1522    rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
1524} \
1525 \
1526static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527                                const uint8_t *src, const uint8_t *dummy, \
1528                                int width, uint32_t *unused) \
1529{ \
1530    rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531                                 maskr, maskg, maskb, rsh, gsh, bsh, S); \
1532}
1533
1534rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
1535rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8)
1536rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
1537rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8)
1538rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
1539rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
1540rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
1541rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
1542rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
1543rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
1544rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8)
1545rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7)
1546rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0,  0, 0,   0x000F, 0x00F0,   0x0F00,  8, 4,  0, RGB2YUV_SHIFT+4)
1547rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8)
1548rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7)
1549rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0,  0, 0,   0x0F00, 0x00F0,   0x000F,  0, 4,  8, RGB2YUV_SHIFT+4)
1550
1551static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1552{
1553    int i;
1554    for (i=0; i<width; i++) {
1555        dst[i]= src[4*i];
1556    }
1557}
1558
1559static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1560{
1561    int i;
1562    for (i=0; i<width; i++) {
1563        dst[i]= src[4*i+3];
1564    }
1565}
1566
1567static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1568{
1569    int i;
1570    for (i=0; i<width; i++) {
1571        int d= src[i];
1572
1573        dst[i]= pal[d] & 0xFF;
1574    }
1575}
1576
1577static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1578                      const uint8_t *src1, const uint8_t *src2,
1579                      int width, uint32_t *pal)
1580{
1581    int i;
1582    assert(src1 == src2);
1583    for (i=0; i<width; i++) {
1584        int p= pal[src1[i]];
1585
1586        dstU[i]= p>>8;
1587        dstV[i]= p>>16;
1588    }
1589}
1590
1591static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1592                          int width, uint32_t *unused)
1593{
1594    int i, j;
1595    for (i=0; i<width/8; i++) {
1596        int d= ~src[i];
1597        for(j=0; j<8; j++)
1598            dst[8*i+j]= ((d>>(7-j))&1)*255;
1599    }
1600}
1601
1602static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1603                          int width, uint32_t *unused)
1604{
1605    int i, j;
1606    for (i=0; i<width/8; i++) {
1607        int d= src[i];
1608        for(j=0; j<8; j++)
1609            dst[8*i+j]= ((d>>(7-j))&1)*255;
1610    }
1611}
1612
1613//FIXME yuy2* can read up to 7 samples too much
1614
1615static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1616                      uint32_t *unused)
1617{
1618    int i;
1619    for (i=0; i<width; i++)
1620        dst[i]= src[2*i];
1621}
1622
1623static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1624                       const uint8_t *src2, int width, uint32_t *unused)
1625{
1626    int i;
1627    for (i=0; i<width; i++) {
1628        dstU[i]= src1[4*i + 1];
1629        dstV[i]= src1[4*i + 3];
1630    }
1631    assert(src1 == src2);
1632}
1633
1634static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1635{
1636    int i;
1637    const uint16_t *src = (const uint16_t *) _src;
1638    uint16_t *dst = (uint16_t *) _dst;
1639    for (i=0; i<width; i++) {
1640        dst[i] = av_bswap16(src[i]);
1641    }
1642}
1643
1644static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1645                        const uint8_t *_src2, int width, uint32_t *unused)
1646{
1647    int i;
1648    const uint16_t *src1 = (const uint16_t *) _src1,
1649                   *src2 = (const uint16_t *) _src2;
1650    uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1651    for (i=0; i<width; i++) {
1652        dstU[i] = av_bswap16(src1[i]);
1653        dstV[i] = av_bswap16(src2[i]);
1654    }
1655}
1656
1657/* This is almost identical to the previous, end exists only because
1658 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1659static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1660                      uint32_t *unused)
1661{
1662    int i;
1663    for (i=0; i<width; i++)
1664        dst[i]= src[2*i+1];
1665}
1666
1667static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1668                       const uint8_t *src2, int width, uint32_t *unused)
1669{
1670    int i;
1671    for (i=0; i<width; i++) {
1672        dstU[i]= src1[4*i + 0];
1673        dstV[i]= src1[4*i + 2];
1674    }
1675    assert(src1 == src2);
1676}
1677
1678static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1679                                        const uint8_t *src, int width)
1680{
1681    int i;
1682    for (i = 0; i < width; i++) {
1683        dst1[i] = src[2*i+0];
1684        dst2[i] = src[2*i+1];
1685    }
1686}
1687
1688static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1689                       const uint8_t *src1, const uint8_t *src2,
1690                       int width, uint32_t *unused)
1691{
1692    nvXXtoUV_c(dstU, dstV, src1, width);
1693}
1694
1695static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1696                       const uint8_t *src1, const uint8_t *src2,
1697                       int width, uint32_t *unused)
1698{
1699    nvXXtoUV_c(dstV, dstU, src1, width);
1700}
1701
1702#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1703
1704static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1705                       int width, uint32_t *unused)
1706{
1707    int i;
1708    for (i=0; i<width; i++) {
1709        int b= src[i*3+0];
1710        int g= src[i*3+1];
1711        int r= src[i*3+2];
1712
1713        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1714    }
1715}
1716
1717static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1718                        const uint8_t *src2, int width, uint32_t *unused)
1719{
1720    int i;
1721    for (i=0; i<width; i++) {
1722        int b= src1[3*i + 0];
1723        int g= src1[3*i + 1];
1724        int r= src1[3*i + 2];
1725
1726        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1727        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1728    }
1729    assert(src1 == src2);
1730}
1731
1732static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733                             const uint8_t *src2, int width, uint32_t *unused)
1734{
1735    int i;
1736    for (i=0; i<width; i++) {
1737        int b= src1[6*i + 0] + src1[6*i + 3];
1738        int g= src1[6*i + 1] + src1[6*i + 4];
1739        int r= src1[6*i + 2] + src1[6*i + 5];
1740
1741        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1742        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1743    }
1744    assert(src1 == src2);
1745}
1746
1747static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1748                       uint32_t *unused)
1749{
1750    int i;
1751    for (i=0; i<width; i++) {
1752        int r= src[i*3+0];
1753        int g= src[i*3+1];
1754        int b= src[i*3+2];
1755
1756        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1757    }
1758}
1759
1760static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1761                        const uint8_t *src2, int width, uint32_t *unused)
1762{
1763    int i;
1764    assert(src1==src2);
1765    for (i=0; i<width; i++) {
1766        int r= src1[3*i + 0];
1767        int g= src1[3*i + 1];
1768        int b= src1[3*i + 2];
1769
1770        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1771        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1772    }
1773}
1774
1775static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1776                             const uint8_t *src2, int width, uint32_t *unused)
1777{
1778    int i;
1779    assert(src1==src2);
1780    for (i=0; i<width; i++) {
1781        int r= src1[6*i + 0] + src1[6*i + 3];
1782        int g= src1[6*i + 1] + src1[6*i + 4];
1783        int b= src1[6*i + 2] + src1[6*i + 5];
1784
1785        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1786        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1787    }
1788}
1789
1790static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1791{
1792    int i;
1793    for (i = 0; i < width; i++) {
1794        int g = src[0][i];
1795        int b = src[1][i];
1796        int r = src[2][i];
1797
1798        dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1799    }
1800}
1801
1802static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1803{
1804    int i;
1805    const uint16_t **src = (const uint16_t **) _src;
1806    uint16_t *dst = (uint16_t *) _dst;
1807    for (i = 0; i < width; i++) {
1808        int g = AV_RL16(src[0] + i);
1809        int b = AV_RL16(src[1] + i);
1810        int r = AV_RL16(src[2] + i);
1811
1812        dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1813    }
1814}
1815
1816static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1817{
1818    int i;
1819    const uint16_t **src = (const uint16_t **) _src;
1820    uint16_t *dst = (uint16_t *) _dst;
1821    for (i = 0; i < width; i++) {
1822        int g = AV_RB16(src[0] + i);
1823        int b = AV_RB16(src[1] + i);
1824        int r = AV_RB16(src[2] + i);
1825
1826        dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1827    }
1828}
1829
1830static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1831{
1832    int i;
1833    for (i = 0; i < width; i++) {
1834        int g = src[0][i];
1835        int b = src[1][i];
1836        int r = src[2][i];
1837
1838        dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839        dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1840    }
1841}
1842
1843static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1844{
1845    int i;
1846    const uint16_t **src = (const uint16_t **) _src;
1847    uint16_t *dstU = (uint16_t *) _dstU;
1848    uint16_t *dstV = (uint16_t *) _dstV;
1849    for (i = 0; i < width; i++) {
1850        int g = AV_RL16(src[0] + i);
1851        int b = AV_RL16(src[1] + i);
1852        int r = AV_RL16(src[2] + i);
1853
1854        dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855        dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1856    }
1857}
1858
1859static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1860{
1861    int i;
1862    const uint16_t **src = (const uint16_t **) _src;
1863    uint16_t *dstU = (uint16_t *) _dstU;
1864    uint16_t *dstV = (uint16_t *) _dstV;
1865    for (i = 0; i < width; i++) {
1866        int g = AV_RB16(src[0] + i);
1867        int b = AV_RB16(src[1] + i);
1868        int r = AV_RB16(src[2] + i);
1869
1870        dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871        dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1872    }
1873}
1874
1875static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1876                           const int16_t *filter,
1877                           const int32_t *filterPos, int filterSize)
1878{
1879    int i;
1880    int32_t *dst = (int32_t *) _dst;
1881    const uint16_t *src = (const uint16_t *) _src;
1882    int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1883    int sh = bits - 4;
1884
1885    for (i = 0; i < dstW; i++) {
1886        int j;
1887        int srcPos = filterPos[i];
1888        int val = 0;
1889
1890        for (j = 0; j < filterSize; j++) {
1891            val += src[srcPos + j] * filter[filterSize * i + j];
1892        }
1893        // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1894        dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1895    }
1896}
1897
1898static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1899                           const int16_t *filter,
1900                           const int32_t *filterPos, int filterSize)
1901{
1902    int i;
1903    const uint16_t *src = (const uint16_t *) _src;
1904    int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
1905
1906    for (i = 0; i < dstW; i++) {
1907        int j;
1908        int srcPos = filterPos[i];
1909        int val = 0;
1910
1911        for (j = 0; j < filterSize; j++) {
1912            val += src[srcPos + j] * filter[filterSize * i + j];
1913        }
1914        // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1915        dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1916    }
1917}
1918
1919// bilinear / bicubic scaling
1920static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1921                          const int16_t *filter, const int32_t *filterPos,
1922                          int filterSize)
1923{
1924    int i;
1925    for (i=0; i<dstW; i++) {
1926        int j;
1927        int srcPos= filterPos[i];
1928        int val=0;
1929        for (j=0; j<filterSize; j++) {
1930            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1931        }
1932        //filter += hFilterSize;
1933        dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1934        //dst[i] = val>>7;
1935    }
1936}
1937
1938static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1939                          const int16_t *filter, const int32_t *filterPos,
1940                          int filterSize)
1941{
1942    int i;
1943    int32_t *dst = (int32_t *) _dst;
1944    for (i=0; i<dstW; i++) {
1945        int j;
1946        int srcPos= filterPos[i];
1947        int val=0;
1948        for (j=0; j<filterSize; j++) {
1949            val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1950        }
1951        //filter += hFilterSize;
1952        dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1953        //dst[i] = val>>7;
1954    }
1955}
1956
1957//FIXME all pal and rgb srcFormats could do this convertion as well
1958//FIXME all scalers more complex than bilinear could do half of this transform
1959static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1960{
1961    int i;
1962    for (i = 0; i < width; i++) {
1963        dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1964        dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1965    }
1966}
1967static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1968{
1969    int i;
1970    for (i = 0; i < width; i++) {
1971        dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1972        dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1973    }
1974}
1975static void lumRangeToJpeg_c(int16_t *dst, int width)
1976{
1977    int i;
1978    for (i = 0; i < width; i++)
1979        dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1980}
1981static void lumRangeFromJpeg_c(int16_t *dst, int width)
1982{
1983    int i;
1984    for (i = 0; i < width; i++)
1985        dst[i] = (dst[i]*14071 + 33561947)>>14;
1986}
1987
1988static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1989{
1990    int i;
1991    int32_t *dstU = (int32_t *) _dstU;
1992    int32_t *dstV = (int32_t *) _dstV;
1993    for (i = 0; i < width; i++) {
1994        dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1995        dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1996    }
1997}
1998static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1999{
2000    int i;
2001    int32_t *dstU = (int32_t *) _dstU;
2002    int32_t *dstV = (int32_t *) _dstV;
2003    for (i = 0; i < width; i++) {
2004        dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2005        dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2006    }
2007}
2008static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2009{
2010    int i;
2011    int32_t *dst = (int32_t *) _dst;
2012    for (i = 0; i < width; i++)
2013        dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2014}
2015static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2016{
2017    int i;
2018    int32_t *dst = (int32_t *) _dst;
2019    for (i = 0; i < width; i++)
2020        dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2021}
2022
2023static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2024                           const uint8_t *src, int srcW, int xInc)
2025{
2026    int i;
2027    unsigned int xpos=0;
2028    for (i=0;i<dstWidth;i++) {
2029        register unsigned int xx=xpos>>16;
2030        register unsigned int xalpha=(xpos&0xFFFF)>>9;
2031        dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2032        xpos+=xInc;
2033    }
2034}
2035
2036// *** horizontal scale Y line to temp buffer
2037static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2038                                     const uint8_t *src_in[4], int srcW, int xInc,
2039                                     const int16_t *hLumFilter,
2040                                     const int32_t *hLumFilterPos, int hLumFilterSize,
2041                                     uint8_t *formatConvBuffer,
2042                                     uint32_t *pal, int isAlpha)
2043{
2044    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2045    void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2046    const uint8_t *src = src_in[isAlpha ? 3 : 0];
2047
2048    if (toYV12) {
2049        toYV12(formatConvBuffer, src, srcW, pal);
2050        src= formatConvBuffer;
2051    } else if (c->readLumPlanar && !isAlpha) {
2052        c->readLumPlanar(formatConvBuffer, src_in, srcW);
2053        src = formatConvBuffer;
2054    }
2055
2056    if (!c->hyscale_fast) {
2057        c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2058    } else { // fast bilinear upscale / crap downscale
2059        c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2060    }
2061
2062    if (convertRange)
2063        convertRange(dst, dstWidth);
2064}
2065
2066static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2067                           int dstWidth, const uint8_t *src1,
2068                           const uint8_t *src2, int srcW, int xInc)
2069{
2070    int i;
2071    unsigned int xpos=0;
2072    for (i=0;i<dstWidth;i++) {
2073        register unsigned int xx=xpos>>16;
2074        register unsigned int xalpha=(xpos&0xFFFF)>>9;
2075        dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2076        dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2077        xpos+=xInc;
2078    }
2079}
2080
2081static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2082                                     const uint8_t *src_in[4],
2083                                     int srcW, int xInc, const int16_t *hChrFilter,
2084                                     const int32_t *hChrFilterPos, int hChrFilterSize,
2085                                     uint8_t *formatConvBuffer, uint32_t *pal)
2086{
2087    const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2088    if (c->chrToYV12) {
2089        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2090        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2091        src1= formatConvBuffer;
2092        src2= buf2;
2093    } else if (c->readChrPlanar) {
2094        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2095        c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2096        src1= formatConvBuffer;
2097        src2= buf2;
2098    }
2099
2100    if (!c->hcscale_fast) {
2101        c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2102        c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2103    } else { // fast bilinear upscale / crap downscale
2104        c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2105    }
2106
2107    if (c->chrConvertRange)
2108        c->chrConvertRange(dst1, dst2, dstWidth);
2109}
2110
2111static av_always_inline void
2112find_c_packed_planar_out_funcs(SwsContext *c,
2113                               yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
2114                               yuv2interleavedX_fn *yuv2nv12cX,
2115                               yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
2116                               yuv2packedX_fn *yuv2packedX)
2117{
2118    enum PixelFormat dstFormat = c->dstFormat;
2119
2120    if (is16BPS(dstFormat)) {
2121        *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
2122        *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
2123    } else if (is9_OR_10BPS(dstFormat)) {
2124        if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2125            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
2126            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
2127        } else {
2128            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
2129            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
2130        }
2131    } else {
2132        *yuv2plane1 = yuv2plane1_8_c;
2133        *yuv2planeX = yuv2planeX_8_c;
2134        if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2135            *yuv2nv12cX = yuv2nv12cX_c;
2136    }
2137
2138    if(c->flags & SWS_FULL_CHR_H_INT) {
2139        switch (dstFormat) {
2140            case PIX_FMT_RGBA:
2141#if CONFIG_SMALL
2142                *yuv2packedX = yuv2rgba32_full_X_c;
2143#else
2144#if CONFIG_SWSCALE_ALPHA
2145                if (c->alpPixBuf) {
2146                    *yuv2packedX = yuv2rgba32_full_X_c;
2147                } else
2148#endif /* CONFIG_SWSCALE_ALPHA */
2149                {
2150                    *yuv2packedX = yuv2rgbx32_full_X_c;
2151                }
2152#endif /* !CONFIG_SMALL */
2153                break;
2154            case PIX_FMT_ARGB:
2155#if CONFIG_SMALL
2156                *yuv2packedX = yuv2argb32_full_X_c;
2157#else
2158#if CONFIG_SWSCALE_ALPHA
2159                if (c->alpPixBuf) {
2160                    *yuv2packedX = yuv2argb32_full_X_c;
2161                } else
2162#endif /* CONFIG_SWSCALE_ALPHA */
2163                {
2164                    *yuv2packedX = yuv2xrgb32_full_X_c;
2165                }
2166#endif /* !CONFIG_SMALL */
2167                break;
2168            case PIX_FMT_BGRA:
2169#if CONFIG_SMALL
2170                *yuv2packedX = yuv2bgra32_full_X_c;
2171#else
2172#if CONFIG_SWSCALE_ALPHA
2173                if (c->alpPixBuf) {
2174                    *yuv2packedX = yuv2bgra32_full_X_c;
2175                } else
2176#endif /* CONFIG_SWSCALE_ALPHA */
2177                {
2178                    *yuv2packedX = yuv2bgrx32_full_X_c;
2179                }
2180#endif /* !CONFIG_SMALL */
2181                break;
2182            case PIX_FMT_ABGR:
2183#if CONFIG_SMALL
2184                *yuv2packedX = yuv2abgr32_full_X_c;
2185#else
2186#if CONFIG_SWSCALE_ALPHA
2187                if (c->alpPixBuf) {
2188                    *yuv2packedX = yuv2abgr32_full_X_c;
2189                } else
2190#endif /* CONFIG_SWSCALE_ALPHA */
2191                {
2192                    *yuv2packedX = yuv2xbgr32_full_X_c;
2193                }
2194#endif /* !CONFIG_SMALL */
2195                break;
2196            case PIX_FMT_RGB24:
2197            *yuv2packedX = yuv2rgb24_full_X_c;
2198            break;
2199        case PIX_FMT_BGR24:
2200            *yuv2packedX = yuv2bgr24_full_X_c;
2201            break;
2202        }
2203    } else {
2204        switch (dstFormat) {
2205        case PIX_FMT_RGB48LE:
2206            *yuv2packed1 = yuv2rgb48le_1_c;
2207            *yuv2packed2 = yuv2rgb48le_2_c;
2208            *yuv2packedX = yuv2rgb48le_X_c;
2209            break;
2210        case PIX_FMT_RGB48BE:
2211            *yuv2packed1 = yuv2rgb48be_1_c;
2212            *yuv2packed2 = yuv2rgb48be_2_c;
2213            *yuv2packedX = yuv2rgb48be_X_c;
2214            break;
2215        case PIX_FMT_BGR48LE:
2216            *yuv2packed1 = yuv2bgr48le_1_c;
2217            *yuv2packed2 = yuv2bgr48le_2_c;
2218            *yuv2packedX = yuv2bgr48le_X_c;
2219            break;
2220        case PIX_FMT_BGR48BE:
2221            *yuv2packed1 = yuv2bgr48be_1_c;
2222            *yuv2packed2 = yuv2bgr48be_2_c;
2223            *yuv2packedX = yuv2bgr48be_X_c;
2224            break;
2225        case PIX_FMT_RGB32:
2226        case PIX_FMT_BGR32:
2227#if CONFIG_SMALL
2228            *yuv2packed1 = yuv2rgb32_1_c;
2229            *yuv2packed2 = yuv2rgb32_2_c;
2230            *yuv2packedX = yuv2rgb32_X_c;
2231#else
2232#if CONFIG_SWSCALE_ALPHA
2233                if (c->alpPixBuf) {
2234                    *yuv2packed1 = yuv2rgba32_1_c;
2235                    *yuv2packed2 = yuv2rgba32_2_c;
2236                    *yuv2packedX = yuv2rgba32_X_c;
2237                } else
2238#endif /* CONFIG_SWSCALE_ALPHA */
2239                {
2240                    *yuv2packed1 = yuv2rgbx32_1_c;
2241                    *yuv2packed2 = yuv2rgbx32_2_c;
2242                    *yuv2packedX = yuv2rgbx32_X_c;
2243                }
2244#endif /* !CONFIG_SMALL */
2245            break;
2246        case PIX_FMT_RGB32_1:
2247        case PIX_FMT_BGR32_1:
2248#if CONFIG_SMALL
2249                *yuv2packed1 = yuv2rgb32_1_1_c;
2250                *yuv2packed2 = yuv2rgb32_1_2_c;
2251                *yuv2packedX = yuv2rgb32_1_X_c;
2252#else
2253#if CONFIG_SWSCALE_ALPHA
2254                if (c->alpPixBuf) {
2255                    *yuv2packed1 = yuv2rgba32_1_1_c;
2256                    *yuv2packed2 = yuv2rgba32_1_2_c;
2257                    *yuv2packedX = yuv2rgba32_1_X_c;
2258                } else
2259#endif /* CONFIG_SWSCALE_ALPHA */
2260                {
2261                    *yuv2packed1 = yuv2rgbx32_1_1_c;
2262                    *yuv2packed2 = yuv2rgbx32_1_2_c;
2263                    *yuv2packedX = yuv2rgbx32_1_X_c;
2264                }
2265#endif /* !CONFIG_SMALL */
2266                break;
2267        case PIX_FMT_RGB24:
2268            *yuv2packed1 = yuv2rgb24_1_c;
2269            *yuv2packed2 = yuv2rgb24_2_c;
2270            *yuv2packedX = yuv2rgb24_X_c;
2271            break;
2272        case PIX_FMT_BGR24:
2273            *yuv2packed1 = yuv2bgr24_1_c;
2274            *yuv2packed2 = yuv2bgr24_2_c;
2275            *yuv2packedX = yuv2bgr24_X_c;
2276            break;
2277        case PIX_FMT_RGB565LE:
2278        case PIX_FMT_RGB565BE:
2279        case PIX_FMT_BGR565LE:
2280        case PIX_FMT_BGR565BE:
2281            *yuv2packed1 = yuv2rgb16_1_c;
2282            *yuv2packed2 = yuv2rgb16_2_c;
2283            *yuv2packedX = yuv2rgb16_X_c;
2284            break;
2285        case PIX_FMT_RGB555LE:
2286        case PIX_FMT_RGB555BE:
2287        case PIX_FMT_BGR555LE:
2288        case PIX_FMT_BGR555BE:
2289            *yuv2packed1 = yuv2rgb15_1_c;
2290            *yuv2packed2 = yuv2rgb15_2_c;
2291            *yuv2packedX = yuv2rgb15_X_c;
2292            break;
2293        case PIX_FMT_RGB444LE:
2294        case PIX_FMT_RGB444BE:
2295        case PIX_FMT_BGR444LE:
2296        case PIX_FMT_BGR444BE:
2297            *yuv2packed1 = yuv2rgb12_1_c;
2298            *yuv2packed2 = yuv2rgb12_2_c;
2299            *yuv2packedX = yuv2rgb12_X_c;
2300            break;
2301        case PIX_FMT_RGB8:
2302        case PIX_FMT_BGR8:
2303            *yuv2packed1 = yuv2rgb8_1_c;
2304            *yuv2packed2 = yuv2rgb8_2_c;
2305            *yuv2packedX = yuv2rgb8_X_c;
2306            break;
2307        case PIX_FMT_RGB4:
2308        case PIX_FMT_BGR4:
2309            *yuv2packed1 = yuv2rgb4_1_c;
2310            *yuv2packed2 = yuv2rgb4_2_c;
2311            *yuv2packedX = yuv2rgb4_X_c;
2312            break;
2313        case PIX_FMT_RGB4_BYTE:
2314        case PIX_FMT_BGR4_BYTE:
2315            *yuv2packed1 = yuv2rgb4b_1_c;
2316            *yuv2packed2 = yuv2rgb4b_2_c;
2317            *yuv2packedX = yuv2rgb4b_X_c;
2318            break;
2319        }
2320    }
2321    switch (dstFormat) {
2322    case PIX_FMT_GRAY16BE:
2323        *yuv2packed1 = yuv2gray16BE_1_c;
2324        *yuv2packed2 = yuv2gray16BE_2_c;
2325        *yuv2packedX = yuv2gray16BE_X_c;
2326        break;
2327    case PIX_FMT_GRAY16LE:
2328        *yuv2packed1 = yuv2gray16LE_1_c;
2329        *yuv2packed2 = yuv2gray16LE_2_c;
2330        *yuv2packedX = yuv2gray16LE_X_c;
2331        break;
2332    case PIX_FMT_MONOWHITE:
2333        *yuv2packed1 = yuv2monowhite_1_c;
2334        *yuv2packed2 = yuv2monowhite_2_c;
2335        *yuv2packedX = yuv2monowhite_X_c;
2336        break;
2337    case PIX_FMT_MONOBLACK:
2338        *yuv2packed1 = yuv2monoblack_1_c;
2339        *yuv2packed2 = yuv2monoblack_2_c;
2340        *yuv2packedX = yuv2monoblack_X_c;
2341        break;
2342    case PIX_FMT_YUYV422:
2343        *yuv2packed1 = yuv2yuyv422_1_c;
2344        *yuv2packed2 = yuv2yuyv422_2_c;
2345        *yuv2packedX = yuv2yuyv422_X_c;
2346        break;
2347    case PIX_FMT_UYVY422:
2348        *yuv2packed1 = yuv2uyvy422_1_c;
2349        *yuv2packed2 = yuv2uyvy422_2_c;
2350        *yuv2packedX = yuv2uyvy422_X_c;
2351        break;
2352    }
2353}
2354
2355#define DEBUG_SWSCALE_BUFFERS 0
2356#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2357
2358static int swScale(SwsContext *c, const uint8_t* src[],
2359                   int srcStride[], int srcSliceY,
2360                   int srcSliceH, uint8_t* dst[], int dstStride[])
2361{
2362    /* load a few things into local vars to make the code more readable? and faster */
2363    const int srcW= c->srcW;
2364    const int dstW= c->dstW;
2365    const int dstH= c->dstH;
2366    const int chrDstW= c->chrDstW;
2367    const int chrSrcW= c->chrSrcW;
2368    const int lumXInc= c->lumXInc;
2369    const int chrXInc= c->chrXInc;
2370    const enum PixelFormat dstFormat= c->dstFormat;
2371    const int flags= c->flags;
2372    int32_t *vLumFilterPos= c->vLumFilterPos;
2373    int32_t *vChrFilterPos= c->vChrFilterPos;
2374    int32_t *hLumFilterPos= c->hLumFilterPos;
2375    int32_t *hChrFilterPos= c->hChrFilterPos;
2376    int16_t *vLumFilter= c->vLumFilter;
2377    int16_t *vChrFilter= c->vChrFilter;
2378    int16_t *hLumFilter= c->hLumFilter;
2379    int16_t *hChrFilter= c->hChrFilter;
2380    int32_t *lumMmxFilter= c->lumMmxFilter;
2381    int32_t *chrMmxFilter= c->chrMmxFilter;
2382    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2383    const int vLumFilterSize= c->vLumFilterSize;
2384    const int vChrFilterSize= c->vChrFilterSize;
2385    const int hLumFilterSize= c->hLumFilterSize;
2386    const int hChrFilterSize= c->hChrFilterSize;
2387    int16_t **lumPixBuf= c->lumPixBuf;
2388    int16_t **chrUPixBuf= c->chrUPixBuf;
2389    int16_t **chrVPixBuf= c->chrVPixBuf;
2390    int16_t **alpPixBuf= c->alpPixBuf;
2391    const int vLumBufSize= c->vLumBufSize;
2392    const int vChrBufSize= c->vChrBufSize;
2393    uint8_t *formatConvBuffer= c->formatConvBuffer;
2394    const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2395    const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2396    int lastDstY;
2397    uint32_t *pal=c->pal_yuv;
2398    yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
2399    yuv2planarX_fn yuv2planeX = c->yuv2planeX;
2400    yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
2401    yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
2402    yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
2403    yuv2packedX_fn yuv2packedX = c->yuv2packedX;
2404    int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2405
2406    /* vars which will change and which we need to store back in the context */
2407    int dstY= c->dstY;
2408    int lumBufIndex= c->lumBufIndex;
2409    int chrBufIndex= c->chrBufIndex;
2410    int lastInLumBuf= c->lastInLumBuf;
2411    int lastInChrBuf= c->lastInChrBuf;
2412
2413    if (isPacked(c->srcFormat)) {
2414        src[0]=
2415        src[1]=
2416        src[2]=
2417        src[3]= src[0];
2418        srcStride[0]=
2419        srcStride[1]=
2420        srcStride[2]=
2421        srcStride[3]= srcStride[0];
2422    }
2423    srcStride[1]<<= c->vChrDrop;
2424    srcStride[2]<<= c->vChrDrop;
2425
2426    DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2427                  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2428                  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2429    DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2430                   srcSliceY,    srcSliceH,    dstY,    dstH);
2431    DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2432                   vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
2433
2434    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2435        static int warnedAlready=0; //FIXME move this into the context perhaps
2436        if (flags & SWS_PRINT_INFO && !warnedAlready) {
2437            av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2438                   "         ->cannot do aligned memory accesses anymore\n");
2439            warnedAlready=1;
2440        }
2441    }
2442
2443    /* Note the user might start scaling the picture in the middle so this
2444       will not get executed. This is not really intended but works
2445       currently, so people might do it. */
2446    if (srcSliceY ==0) {
2447        lumBufIndex=-1;
2448        chrBufIndex=-1;
2449        dstY=0;
2450        lastInLumBuf= -1;
2451        lastInChrBuf= -1;
2452    }
2453
2454    if (!should_dither) {
2455        c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
2456    }
2457    lastDstY= dstY;
2458
2459    for (;dstY < dstH; dstY++) {
2460        const int chrDstY= dstY>>c->chrDstVSubSample;
2461        uint8_t *dest[4] = {
2462            dst[0] + dstStride[0] * dstY,
2463            dst[1] + dstStride[1] * chrDstY,
2464            dst[2] + dstStride[2] * chrDstY,
2465            (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2466        };
2467
2468        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2469        const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2470        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2471
2472        // Last line needed as input
2473        int lastLumSrcY  = FFMIN(c->srcH,    firstLumSrcY  + vLumFilterSize) - 1;
2474        int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
2475        int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
2476        int enough_lines;
2477
2478        //handle holes (FAST_BILINEAR & weird filters)
2479        if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2480        if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2481        assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2482        assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2483
2484        DEBUG_BUFFERS("dstY: %d\n", dstY);
2485        DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2486                         firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
2487        DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2488                         firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
2489
2490        // Do we have enough lines in this slice to output the dstY line
2491        enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2492
2493        if (!enough_lines) {
2494            lastLumSrcY = srcSliceY + srcSliceH - 1;
2495            lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2496            DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2497                                            lastLumSrcY, lastChrSrcY);
2498        }
2499
2500        //Do horizontal scaling
2501        while(lastInLumBuf < lastLumSrcY) {
2502            const uint8_t *src1[4] = {
2503                src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2504                src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2505                src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2506                src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2507            };
2508            lumBufIndex++;
2509            assert(lumBufIndex < 2*vLumBufSize);
2510            assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2511            assert(lastInLumBuf + 1 - srcSliceY >= 0);
2512            hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2513                    hLumFilter, hLumFilterPos, hLumFilterSize,
2514                    formatConvBuffer,
2515                    pal, 0);
2516            if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2517                hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2518                        lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2519                        formatConvBuffer,
2520                        pal, 1);
2521            lastInLumBuf++;
2522            DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2523                               lumBufIndex,    lastInLumBuf);
2524        }
2525        while(lastInChrBuf < lastChrSrcY) {
2526            const uint8_t *src1[4] = {
2527                src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2528                src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2529                src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2530                src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2531            };
2532            chrBufIndex++;
2533            assert(chrBufIndex < 2*vChrBufSize);
2534            assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2535            assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2536            //FIXME replace parameters through context struct (some at least)
2537
2538            if (c->needs_hcscale)
2539                hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2540                          chrDstW, src1, chrSrcW, chrXInc,
2541                          hChrFilter, hChrFilterPos, hChrFilterSize,
2542                          formatConvBuffer, pal);
2543            lastInChrBuf++;
2544            DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2545                               chrBufIndex,    lastInChrBuf);
2546        }
2547        //wrap buf index around to stay inside the ring buffer
2548        if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2549        if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2550        if (!enough_lines)
2551            break; //we can't output a dstY line so let's try with the next slice
2552
2553#if HAVE_MMX
2554        updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2555#endif
2556        if (should_dither) {
2557            c->chrDither8 = dither_8x8_128[chrDstY & 7];
2558            c->lumDither8 = dither_8x8_128[dstY & 7];
2559        }
2560        if (dstY >= dstH-2) {
2561            // hmm looks like we can't use MMX here without overwriting this array's tail
2562            find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
2563                                           &yuv2packed1, &yuv2packed2, &yuv2packedX);
2564        }
2565
2566        {
2567            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2568            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2569            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2570            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2571
2572            if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2573                const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2574                int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2575                for (i = 0; i < neg;            i++)
2576                    tmpY[i] = lumSrcPtr[neg];
2577                for (     ; i < end;            i++)
2578                    tmpY[i] = lumSrcPtr[i];
2579                for (     ; i < vLumFilterSize; i++)
2580                    tmpY[i] = tmpY[i-1];
2581                lumSrcPtr = tmpY;
2582
2583                if (alpSrcPtr) {
2584                    const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2585                    for (i = 0; i < neg;            i++)
2586                        tmpA[i] = alpSrcPtr[neg];
2587                    for (     ; i < end;            i++)
2588                        tmpA[i] = alpSrcPtr[i];
2589                    for (     ; i < vLumFilterSize; i++)
2590                        tmpA[i] = tmpA[i - 1];
2591                    alpSrcPtr = tmpA;
2592                }
2593            }
2594            if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2595                const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2596                              **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2597                int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2598                for (i = 0; i < neg;            i++) {
2599                    tmpU[i] = chrUSrcPtr[neg];
2600                    tmpV[i] = chrVSrcPtr[neg];
2601                }
2602                for (     ; i < end;            i++) {
2603                    tmpU[i] = chrUSrcPtr[i];
2604                    tmpV[i] = chrVSrcPtr[i];
2605                }
2606                for (     ; i < vChrFilterSize; i++) {
2607                    tmpU[i] = tmpU[i - 1];
2608                    tmpV[i] = tmpV[i - 1];
2609                }
2610                chrUSrcPtr = tmpU;
2611                chrVSrcPtr = tmpV;
2612            }
2613
2614            if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2615                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2616
2617                if (vLumFilterSize == 1) {
2618                    yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2619                } else {
2620                    yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2621                               lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2622                }
2623
2624                if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2625                    if (yuv2nv12cX) {
2626                        yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2627                    } else if (vChrFilterSize == 1) {
2628                        yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2629                        yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2630                    } else {
2631                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2632                                   chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2633                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2634                                   chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2635                    }
2636                }
2637
2638                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2639                    if (vLumFilterSize == 1) {
2640                        yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2641                    } else {
2642                        yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2643                                   alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2644                    }
2645                }
2646            } else {
2647                assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
2648                assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
2649                if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2650                    int chrAlpha = vChrFilter[2 * dstY + 1];
2651                    yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2652                                alpPixBuf ? *alpSrcPtr : NULL,
2653                                dest[0], dstW, chrAlpha, dstY);
2654                } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2655                    int lumAlpha = vLumFilter[2 * dstY + 1];
2656                    int chrAlpha = vChrFilter[2 * dstY + 1];
2657                    lumMmxFilter[2] =
2658                    lumMmxFilter[3] = vLumFilter[2 * dstY   ] * 0x10001;
2659                    chrMmxFilter[2] =
2660                    chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2661                    yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2662                                alpPixBuf ? alpSrcPtr : NULL,
2663                                dest[0], dstW, lumAlpha, chrAlpha, dstY);
2664                } else { //general RGB
2665                    yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2666                                lumSrcPtr, vLumFilterSize,
2667                                vChrFilter + dstY * vChrFilterSize,
2668                                chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2669                                alpSrcPtr, dest[0], dstW, dstY);
2670                }
2671            }
2672        }
2673    }
2674
2675    if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2676        fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2677
2678#if HAVE_MMX2
2679    if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
2680        __asm__ volatile("sfence":::"memory");
2681#endif
2682    emms_c();
2683
2684    /* store changed local vars back in the context */
2685    c->dstY= dstY;
2686    c->lumBufIndex= lumBufIndex;
2687    c->chrBufIndex= chrBufIndex;
2688    c->lastInLumBuf= lastInLumBuf;
2689    c->lastInChrBuf= lastInChrBuf;
2690
2691    return dstY - lastDstY;
2692}
2693
2694static av_cold void sws_init_swScale_c(SwsContext *c)
2695{
2696    enum PixelFormat srcFormat = c->srcFormat;
2697
2698    find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
2699                                   &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2700                                   &c->yuv2packedX);
2701
2702    c->chrToYV12 = NULL;
2703    switch(srcFormat) {
2704        case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
2705        case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
2706        case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
2707        case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
2708        case PIX_FMT_RGB8     :
2709        case PIX_FMT_BGR8     :
2710        case PIX_FMT_PAL8     :
2711        case PIX_FMT_BGR4_BYTE:
2712        case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2713        case PIX_FMT_GBRP9LE:
2714        case PIX_FMT_GBRP10LE:
2715        case PIX_FMT_GBRP16LE:  c->readChrPlanar = planar_rgb16le_to_uv; break;
2716        case PIX_FMT_GBRP9BE:
2717        case PIX_FMT_GBRP10BE:
2718        case PIX_FMT_GBRP16BE:  c->readChrPlanar = planar_rgb16be_to_uv; break;
2719        case PIX_FMT_GBRP:      c->readChrPlanar = planar_rgb_to_uv; break;
2720#if HAVE_BIGENDIAN
2721        case PIX_FMT_YUV444P9LE:
2722        case PIX_FMT_YUV422P9LE:
2723        case PIX_FMT_YUV420P9LE:
2724        case PIX_FMT_YUV422P10LE:
2725        case PIX_FMT_YUV444P10LE:
2726        case PIX_FMT_YUV420P10LE:
2727        case PIX_FMT_YUV420P16LE:
2728        case PIX_FMT_YUV422P16LE:
2729        case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2730#else
2731        case PIX_FMT_YUV444P9BE:
2732        case PIX_FMT_YUV422P9BE:
2733        case PIX_FMT_YUV420P9BE:
2734        case PIX_FMT_YUV444P10BE:
2735        case PIX_FMT_YUV422P10BE:
2736        case PIX_FMT_YUV420P10BE:
2737        case PIX_FMT_YUV420P16BE:
2738        case PIX_FMT_YUV422P16BE:
2739        case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2740#endif
2741    }
2742    if (c->chrSrcHSubSample) {
2743        switch(srcFormat) {
2744        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2745        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2746        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2747        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2748        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
2749        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
2750        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
2751        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2752        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2753        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2754        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2755        case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
2756        case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
2757        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
2758        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
2759        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
2760        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2761        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2762        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2763        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2764        case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
2765        case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
2766        }
2767    } else {
2768        switch(srcFormat) {
2769        case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2770        case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2771        case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2772        case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2773        case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
2774        case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
2775        case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
2776        case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2777        case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2778        case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2779        case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2780        case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
2781        case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
2782        case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
2783        case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
2784        case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
2785        case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2786        case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2787        case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2788        case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2789        case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
2790        case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
2791        }
2792    }
2793
2794    c->lumToYV12 = NULL;
2795    c->alpToYV12 = NULL;
2796    switch (srcFormat) {
2797    case PIX_FMT_GBRP9LE:
2798    case PIX_FMT_GBRP10LE:
2799    case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
2800    case PIX_FMT_GBRP9BE:
2801    case PIX_FMT_GBRP10BE:
2802    case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
2803    case PIX_FMT_GBRP:     c->readLumPlanar = planar_rgb_to_y; break;
2804#if HAVE_BIGENDIAN
2805    case PIX_FMT_YUV444P9LE:
2806    case PIX_FMT_YUV422P9LE:
2807    case PIX_FMT_YUV420P9LE:
2808    case PIX_FMT_YUV444P10LE:
2809    case PIX_FMT_YUV422P10LE:
2810    case PIX_FMT_YUV420P10LE:
2811    case PIX_FMT_YUV420P16LE:
2812    case PIX_FMT_YUV422P16LE:
2813    case PIX_FMT_YUV444P16LE:
2814    case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2815#else
2816    case PIX_FMT_YUV444P9BE:
2817    case PIX_FMT_YUV422P9BE:
2818    case PIX_FMT_YUV420P9BE:
2819    case PIX_FMT_YUV444P10BE:
2820    case PIX_FMT_YUV422P10BE:
2821    case PIX_FMT_YUV420P10BE:
2822    case PIX_FMT_YUV420P16BE:
2823    case PIX_FMT_YUV422P16BE:
2824    case PIX_FMT_YUV444P16BE:
2825    case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2826#endif
2827    case PIX_FMT_YUYV422  :
2828    case PIX_FMT_Y400A    : c->lumToYV12 = yuy2ToY_c; break;
2829    case PIX_FMT_UYVY422  : c->lumToYV12 = uyvyToY_c;    break;
2830    case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
2831    case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2832    case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2833    case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2834    case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2835    case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
2836    case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
2837    case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
2838    case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2839    case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2840    case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2841    case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2842    case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
2843    case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
2844    case PIX_FMT_RGB8     :
2845    case PIX_FMT_BGR8     :
2846    case PIX_FMT_PAL8     :
2847    case PIX_FMT_BGR4_BYTE:
2848    case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2849    case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2850    case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2851    case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
2852    case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2853    case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
2854    case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2855    case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2856    case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2857    case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2858    case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2859    }
2860    if (c->alpPixBuf) {
2861        switch (srcFormat) {
2862        case PIX_FMT_BGRA:
2863        case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
2864        case PIX_FMT_ABGR:
2865        case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
2866        case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2867        }
2868    }
2869
2870    if (c->srcBpc == 8) {
2871        if (c->dstBpc <= 10) {
2872            c->hyScale = c->hcScale = hScale8To15_c;
2873            if (c->flags & SWS_FAST_BILINEAR) {
2874                c->hyscale_fast = hyscale_fast_c;
2875                c->hcscale_fast = hcscale_fast_c;
2876            }
2877        } else {
2878            c->hyScale = c->hcScale = hScale8To19_c;
2879        }
2880    } else {
2881        c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2882    }
2883
2884    if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2885        if (c->dstBpc <= 10) {
2886            if (c->srcRange) {
2887                c->lumConvertRange = lumRangeFromJpeg_c;
2888                c->chrConvertRange = chrRangeFromJpeg_c;
2889            } else {
2890                c->lumConvertRange = lumRangeToJpeg_c;
2891                c->chrConvertRange = chrRangeToJpeg_c;
2892            }
2893        } else {
2894            if (c->srcRange) {
2895                c->lumConvertRange = lumRangeFromJpeg16_c;
2896                c->chrConvertRange = chrRangeFromJpeg16_c;
2897            } else {
2898                c->lumConvertRange = lumRangeToJpeg16_c;
2899                c->chrConvertRange = chrRangeToJpeg16_c;
2900            }
2901        }
2902    }
2903
2904    if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2905          srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2906        c->needs_hcscale = 1;
2907}
2908
2909SwsFunc ff_getSwsFunc(SwsContext *c)
2910{
2911    sws_init_swScale_c(c);
2912
2913    if (HAVE_MMX)
2914        ff_sws_init_swScale_mmx(c);
2915    if (HAVE_ALTIVEC)
2916        ff_sws_init_swScale_altivec(c);
2917
2918    return swScale;
2919}
2920