1/*
2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * the C code (not assembly, mmx, ...) of this file can be used
21 * under the LGPL license too
22 */
23
24/*
25  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27  {BGR,RGB}{1,4,8,15,16} support dithering
28
29  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31  x -> x
32  YUV9 -> YV12
33  YUV9/YV12 -> Y800
34  Y800 -> YUV9/YV12
35  BGR24 -> BGR32 & RGB24 -> RGB32
36  BGR32 -> BGR24 & RGB32 -> RGB24
37  BGR15 -> BGR16
38*/
39
40/*
41tested special converters (most are tested actually, but I did not write it down ...)
42 YV12 -> BGR16
43 YV12 -> YV12
44 BGR15 -> BGR16
45 BGR16 -> BGR16
46 YVU9 -> YV12
47
48untested special converters
49  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50  YV12/I420 -> YV12/I420
51  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52  BGR24 -> BGR32 & RGB24 -> RGB32
53  BGR32 -> BGR24 & RGB32 -> RGB24
54  BGR24 -> YV12
55*/
56
57#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58#include <inttypes.h>
59#include <string.h>
60#include <math.h>
61#include <stdio.h>
62#include <unistd.h>
63#include "config.h"
64#include <assert.h>
65#if HAVE_SYS_MMAN_H
66#include <sys/mman.h>
67#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68#define MAP_ANONYMOUS MAP_ANON
69#endif
70#endif
71#include "swscale.h"
72#include "swscale_internal.h"
73#include "rgb2rgb.h"
74#include "libavutil/x86_cpu.h"
75#include "libavutil/bswap.h"
76
77unsigned swscale_version(void)
78{
79    return LIBSWSCALE_VERSION_INT;
80}
81
82#undef MOVNTQ
83#undef PAVGB
84
85//#undef HAVE_MMX2
86//#define HAVE_AMD3DNOW
87//#undef HAVE_MMX
88//#undef ARCH_X86
89//#define WORDS_BIGENDIAN
90#define DITHER1XBPP
91
92#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
93
94#define RET 0xC3 //near return opcode for x86
95
96#ifdef M_PI
97#define PI M_PI
98#else
99#define PI 3.14159265358979323846
100#endif
101
102#define isSupportedIn(x)    (       \
103           (x)==PIX_FMT_YUV420P     \
104        || (x)==PIX_FMT_YUVA420P    \
105        || (x)==PIX_FMT_YUYV422     \
106        || (x)==PIX_FMT_UYVY422     \
107        || (x)==PIX_FMT_RGB32       \
108        || (x)==PIX_FMT_RGB32_1     \
109        || (x)==PIX_FMT_BGR24       \
110        || (x)==PIX_FMT_BGR565      \
111        || (x)==PIX_FMT_BGR555      \
112        || (x)==PIX_FMT_BGR32       \
113        || (x)==PIX_FMT_BGR32_1     \
114        || (x)==PIX_FMT_RGB24       \
115        || (x)==PIX_FMT_RGB565      \
116        || (x)==PIX_FMT_RGB555      \
117        || (x)==PIX_FMT_GRAY8       \
118        || (x)==PIX_FMT_YUV410P     \
119        || (x)==PIX_FMT_YUV440P     \
120        || (x)==PIX_FMT_GRAY16BE    \
121        || (x)==PIX_FMT_GRAY16LE    \
122        || (x)==PIX_FMT_YUV444P     \
123        || (x)==PIX_FMT_YUV422P     \
124        || (x)==PIX_FMT_YUV411P     \
125        || (x)==PIX_FMT_PAL8        \
126        || (x)==PIX_FMT_BGR8        \
127        || (x)==PIX_FMT_RGB8        \
128        || (x)==PIX_FMT_BGR4_BYTE   \
129        || (x)==PIX_FMT_RGB4_BYTE   \
130        || (x)==PIX_FMT_YUV440P     \
131        || (x)==PIX_FMT_MONOWHITE   \
132        || (x)==PIX_FMT_MONOBLACK   \
133    )
134#define isSupportedOut(x)   (       \
135           (x)==PIX_FMT_YUV420P     \
136        || (x)==PIX_FMT_YUYV422     \
137        || (x)==PIX_FMT_UYVY422     \
138        || (x)==PIX_FMT_YUV444P     \
139        || (x)==PIX_FMT_YUV422P     \
140        || (x)==PIX_FMT_YUV411P     \
141        || isRGB(x)                 \
142        || isBGR(x)                 \
143        || (x)==PIX_FMT_NV12        \
144        || (x)==PIX_FMT_NV21        \
145        || (x)==PIX_FMT_GRAY16BE    \
146        || (x)==PIX_FMT_GRAY16LE    \
147        || (x)==PIX_FMT_GRAY8       \
148        || (x)==PIX_FMT_YUV410P     \
149        || (x)==PIX_FMT_YUV440P     \
150    )
151#define isPacked(x)         (       \
152           (x)==PIX_FMT_PAL8        \
153        || (x)==PIX_FMT_YUYV422     \
154        || (x)==PIX_FMT_UYVY422     \
155        || isRGB(x)                 \
156        || isBGR(x)                 \
157    )
158#define usePal(x)           (       \
159           (x)==PIX_FMT_PAL8        \
160        || (x)==PIX_FMT_BGR4_BYTE   \
161        || (x)==PIX_FMT_RGB4_BYTE   \
162        || (x)==PIX_FMT_BGR8        \
163        || (x)==PIX_FMT_RGB8        \
164    )
165
166#define RGB2YUV_SHIFT 15
167#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
168#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
169#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
170#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
171#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
172#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
173#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
174#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
175#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
176
177extern const int32_t ff_yuv2rgb_coeffs[8][4];
178
179static const double rgb2yuv_table[8][9]={
180    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
181    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
182    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
183    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
184    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
185    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
186    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
187    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
188};
189
190/*
191NOTES
192Special versions: fast Y 1:1 scaling (no interpolation in y direction)
193
194TODO
195more intelligent misalignment avoidance for the horizontal scaler
196write special vertical cubic upscale version
197optimize C code (YV12 / minmax)
198add support for packed pixel YUV input & output
199add support for Y8 output
200optimize BGR24 & BGR32
201add BGR4 output support
202write special BGR->BGR scaler
203*/
204
205#if ARCH_X86 && CONFIG_GPL
206DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
207DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
208DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
209DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
210DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
211DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
212DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
213DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
214
215const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
216        0x0103010301030103LL,
217        0x0200020002000200LL,};
218
219const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
220        0x0602060206020602LL,
221        0x0004000400040004LL,};
222
223DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
224DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
225DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
226DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
227DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
228DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
229
230DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
231DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
232DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
233
234#ifdef FAST_BGR2YV12
235DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
236DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
237DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
238#else
239DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
240DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
241DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
242#endif /* FAST_BGR2YV12 */
243DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
244DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
245DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
246
247DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
248DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
249DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
250DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
251DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
252
253DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
254    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
255    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
256};
257
258DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
259
260#endif /* ARCH_X86 && CONFIG_GPL */
261
262// clipping helper table for C implementations:
263static unsigned char clip_table[768];
264
265static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
266
267static const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
268{  1,   3,   1,   3,   1,   3,   1,   3, },
269{  2,   0,   2,   0,   2,   0,   2,   0, },
270};
271
272static const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
273{  6,   2,   6,   2,   6,   2,   6,   2, },
274{  0,   4,   0,   4,   0,   4,   0,   4, },
275};
276
277const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
278{ 17,   9,  23,  15,  16,   8,  22,  14, },
279{  5,  29,   3,  27,   4,  28,   2,  26, },
280{ 21,  13,  19,  11,  20,  12,  18,  10, },
281{  0,  24,   6,  30,   1,  25,   7,  31, },
282{ 16,   8,  22,  14,  17,   9,  23,  15, },
283{  4,  28,   2,  26,   5,  29,   3,  27, },
284{ 20,  12,  18,  10,  21,  13,  19,  11, },
285{  1,  25,   7,  31,   0,  24,   6,  30, },
286};
287
288#if 0
289const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
290{  0,  48,  12,  60,   3,  51,  15,  63, },
291{ 32,  16,  44,  28,  35,  19,  47,  31, },
292{  8,  56,   4,  52,  11,  59,   7,  55, },
293{ 40,  24,  36,  20,  43,  27,  39,  23, },
294{  2,  50,  14,  62,   1,  49,  13,  61, },
295{ 34,  18,  46,  30,  33,  17,  45,  29, },
296{ 10,  58,   6,  54,   9,  57,   5,  53, },
297{ 42,  26,  38,  22,  41,  25,  37,  21, },
298};
299#endif
300
301const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
302{  0,  55,  14,  68,   3,  58,  17,  72, },
303{ 37,  18,  50,  32,  40,  22,  54,  35, },
304{  9,  64,   5,  59,  13,  67,   8,  63, },
305{ 46,  27,  41,  23,  49,  31,  44,  26, },
306{  2,  57,  16,  71,   1,  56,  15,  70, },
307{ 39,  21,  52,  34,  38,  19,  51,  33, },
308{ 11,  66,   7,  62,  10,  65,   6,  60, },
309{ 48,  30,  43,  25,  47,  29,  42,  24, },
310};
311
312#if 0
313const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
314{ 68,  36,  92,  60,  66,  34,  90,  58, },
315{ 20, 116,  12, 108,  18, 114,  10, 106, },
316{ 84,  52,  76,  44,  82,  50,  74,  42, },
317{  0,  96,  24, 120,   6, 102,  30, 126, },
318{ 64,  32,  88,  56,  70,  38,  94,  62, },
319{ 16, 112,   8, 104,  22, 118,  14, 110, },
320{ 80,  48,  72,  40,  86,  54,  78,  46, },
321{  4, 100,  28, 124,   2,  98,  26, 122, },
322};
323#endif
324
325#if 1
326const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
327{117,  62, 158, 103, 113,  58, 155, 100, },
328{ 34, 199,  21, 186,  31, 196,  17, 182, },
329{144,  89, 131,  76, 141,  86, 127,  72, },
330{  0, 165,  41, 206,  10, 175,  52, 217, },
331{110,  55, 151,  96, 120,  65, 162, 107, },
332{ 28, 193,  14, 179,  38, 203,  24, 189, },
333{138,  83, 124,  69, 148,  93, 134,  79, },
334{  7, 172,  48, 213,   3, 168,  45, 210, },
335};
336#elif 1
337// tries to correct a gamma of 1.5
338const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
339{  0, 143,  18, 200,   2, 156,  25, 215, },
340{ 78,  28, 125,  64,  89,  36, 138,  74, },
341{ 10, 180,   3, 161,  16, 195,   8, 175, },
342{109,  51,  93,  38, 121,  60, 105,  47, },
343{  1, 152,  23, 210,   0, 147,  20, 205, },
344{ 85,  33, 134,  71,  81,  30, 130,  67, },
345{ 14, 190,   6, 171,  12, 185,   5, 166, },
346{117,  57, 101,  44, 113,  54,  97,  41, },
347};
348#elif 1
349// tries to correct a gamma of 2.0
350const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
351{  0, 124,   8, 193,   0, 140,  12, 213, },
352{ 55,  14, 104,  42,  66,  19, 119,  52, },
353{  3, 168,   1, 145,   6, 187,   3, 162, },
354{ 86,  31,  70,  21,  99,  39,  82,  28, },
355{  0, 134,  11, 206,   0, 129,   9, 200, },
356{ 62,  17, 114,  48,  58,  16, 109,  45, },
357{  5, 181,   2, 157,   4, 175,   1, 151, },
358{ 95,  36,  78,  26,  90,  34,  74,  24, },
359};
360#else
361// tries to correct a gamma of 2.5
362const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
363{  0, 107,   3, 187,   0, 125,   6, 212, },
364{ 39,   7,  86,  28,  49,  11, 102,  36, },
365{  1, 158,   0, 131,   3, 180,   1, 151, },
366{ 68,  19,  52,  12,  81,  25,  64,  17, },
367{  0, 119,   5, 203,   0, 113,   4, 195, },
368{ 45,   9,  96,  33,  42,   8,  91,  30, },
369{  2, 172,   1, 144,   2, 165,   0, 137, },
370{ 77,  23,  60,  15,  72,  21,  56,  14, },
371};
372#endif
373
374const char *sws_format_name(enum PixelFormat format)
375{
376    switch (format) {
377        case PIX_FMT_YUV420P:
378            return "yuv420p";
379        case PIX_FMT_YUVA420P:
380            return "yuva420p";
381        case PIX_FMT_YUYV422:
382            return "yuyv422";
383        case PIX_FMT_RGB24:
384            return "rgb24";
385        case PIX_FMT_BGR24:
386            return "bgr24";
387        case PIX_FMT_YUV422P:
388            return "yuv422p";
389        case PIX_FMT_YUV444P:
390            return "yuv444p";
391        case PIX_FMT_RGB32:
392            return "rgb32";
393        case PIX_FMT_YUV410P:
394            return "yuv410p";
395        case PIX_FMT_YUV411P:
396            return "yuv411p";
397        case PIX_FMT_RGB565:
398            return "rgb565";
399        case PIX_FMT_RGB555:
400            return "rgb555";
401        case PIX_FMT_GRAY16BE:
402            return "gray16be";
403        case PIX_FMT_GRAY16LE:
404            return "gray16le";
405        case PIX_FMT_GRAY8:
406            return "gray8";
407        case PIX_FMT_MONOWHITE:
408            return "mono white";
409        case PIX_FMT_MONOBLACK:
410            return "mono black";
411        case PIX_FMT_PAL8:
412            return "Palette";
413        case PIX_FMT_YUVJ420P:
414            return "yuvj420p";
415        case PIX_FMT_YUVJ422P:
416            return "yuvj422p";
417        case PIX_FMT_YUVJ444P:
418            return "yuvj444p";
419        case PIX_FMT_XVMC_MPEG2_MC:
420            return "xvmc_mpeg2_mc";
421        case PIX_FMT_XVMC_MPEG2_IDCT:
422            return "xvmc_mpeg2_idct";
423        case PIX_FMT_UYVY422:
424            return "uyvy422";
425        case PIX_FMT_UYYVYY411:
426            return "uyyvyy411";
427        case PIX_FMT_RGB32_1:
428            return "rgb32x";
429        case PIX_FMT_BGR32_1:
430            return "bgr32x";
431        case PIX_FMT_BGR32:
432            return "bgr32";
433        case PIX_FMT_BGR565:
434            return "bgr565";
435        case PIX_FMT_BGR555:
436            return "bgr555";
437        case PIX_FMT_BGR8:
438            return "bgr8";
439        case PIX_FMT_BGR4:
440            return "bgr4";
441        case PIX_FMT_BGR4_BYTE:
442            return "bgr4 byte";
443        case PIX_FMT_RGB8:
444            return "rgb8";
445        case PIX_FMT_RGB4:
446            return "rgb4";
447        case PIX_FMT_RGB4_BYTE:
448            return "rgb4 byte";
449        case PIX_FMT_NV12:
450            return "nv12";
451        case PIX_FMT_NV21:
452            return "nv21";
453        case PIX_FMT_YUV440P:
454            return "yuv440p";
455        case PIX_FMT_VDPAU_H264:
456            return "vdpau_h264";
457        case PIX_FMT_VDPAU_MPEG1:
458            return "vdpau_mpeg1";
459        case PIX_FMT_VDPAU_MPEG2:
460            return "vdpau_mpeg2";
461        case PIX_FMT_VDPAU_WMV3:
462            return "vdpau_wmv3";
463        case PIX_FMT_VDPAU_VC1:
464            return "vdpau_vc1";
465        default:
466            return "Unknown format";
467    }
468}
469
470static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
471                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
472                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
473{
474    //FIXME Optimize (just quickly written not optimized..)
475    int i;
476    for (i=0; i<dstW; i++)
477    {
478        int val=1<<18;
479        int j;
480        for (j=0; j<lumFilterSize; j++)
481            val += lumSrc[j][i] * lumFilter[j];
482
483        dest[i]= av_clip_uint8(val>>19);
484    }
485
486    if (uDest)
487        for (i=0; i<chrDstW; i++)
488        {
489            int u=1<<18;
490            int v=1<<18;
491            int j;
492            for (j=0; j<chrFilterSize; j++)
493            {
494                u += chrSrc[j][i] * chrFilter[j];
495                v += chrSrc[j][i + VOFW] * chrFilter[j];
496            }
497
498            uDest[i]= av_clip_uint8(u>>19);
499            vDest[i]= av_clip_uint8(v>>19);
500        }
501}
502
503static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
504                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
505                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
506{
507    //FIXME Optimize (just quickly written not optimized..)
508    int i;
509    for (i=0; i<dstW; i++)
510    {
511        int val=1<<18;
512        int j;
513        for (j=0; j<lumFilterSize; j++)
514            val += lumSrc[j][i] * lumFilter[j];
515
516        dest[i]= av_clip_uint8(val>>19);
517    }
518
519    if (!uDest)
520        return;
521
522    if (dstFormat == PIX_FMT_NV12)
523        for (i=0; i<chrDstW; i++)
524        {
525            int u=1<<18;
526            int v=1<<18;
527            int j;
528            for (j=0; j<chrFilterSize; j++)
529            {
530                u += chrSrc[j][i] * chrFilter[j];
531                v += chrSrc[j][i + VOFW] * chrFilter[j];
532            }
533
534            uDest[2*i]= av_clip_uint8(u>>19);
535            uDest[2*i+1]= av_clip_uint8(v>>19);
536        }
537    else
538        for (i=0; i<chrDstW; i++)
539        {
540            int u=1<<18;
541            int v=1<<18;
542            int j;
543            for (j=0; j<chrFilterSize; j++)
544            {
545                u += chrSrc[j][i] * chrFilter[j];
546                v += chrSrc[j][i + VOFW] * chrFilter[j];
547            }
548
549            uDest[2*i]= av_clip_uint8(v>>19);
550            uDest[2*i+1]= av_clip_uint8(u>>19);
551        }
552}
553
554#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
555    for (i=0; i<(dstW>>1); i++){\
556        int j;\
557        int Y1 = 1<<18;\
558        int Y2 = 1<<18;\
559        int U  = 1<<18;\
560        int V  = 1<<18;\
561        type av_unused *r, *b, *g;\
562        const int i2= 2*i;\
563        \
564        for (j=0; j<lumFilterSize; j++)\
565        {\
566            Y1 += lumSrc[j][i2] * lumFilter[j];\
567            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
568        }\
569        for (j=0; j<chrFilterSize; j++)\
570        {\
571            U += chrSrc[j][i] * chrFilter[j];\
572            V += chrSrc[j][i+VOFW] * chrFilter[j];\
573        }\
574        Y1>>=19;\
575        Y2>>=19;\
576        U >>=19;\
577        V >>=19;\
578
579#define YSCALE_YUV_2_PACKEDX_C(type) \
580        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
581        if ((Y1|Y2|U|V)&256)\
582        {\
583            if (Y1>255)   Y1=255; \
584            else if (Y1<0)Y1=0;   \
585            if (Y2>255)   Y2=255; \
586            else if (Y2<0)Y2=0;   \
587            if (U>255)    U=255;  \
588            else if (U<0) U=0;    \
589            if (V>255)    V=255;  \
590            else if (V<0) V=0;    \
591        }
592
593#define YSCALE_YUV_2_PACKEDX_FULL_C \
594    for (i=0; i<dstW; i++){\
595        int j;\
596        int Y = 0;\
597        int U = -128<<19;\
598        int V = -128<<19;\
599        int R,G,B;\
600        \
601        for (j=0; j<lumFilterSize; j++){\
602            Y += lumSrc[j][i     ] * lumFilter[j];\
603        }\
604        for (j=0; j<chrFilterSize; j++){\
605            U += chrSrc[j][i     ] * chrFilter[j];\
606            V += chrSrc[j][i+VOFW] * chrFilter[j];\
607        }\
608        Y >>=10;\
609        U >>=10;\
610        V >>=10;\
611
612#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
613    YSCALE_YUV_2_PACKEDX_FULL_C\
614        Y-= c->yuv2rgb_y_offset;\
615        Y*= c->yuv2rgb_y_coeff;\
616        Y+= rnd;\
617        R= Y + V*c->yuv2rgb_v2r_coeff;\
618        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
619        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
620        if ((R|G|B)&(0xC0000000)){\
621            if (R>=(256<<22))   R=(256<<22)-1; \
622            else if (R<0)R=0;   \
623            if (G>=(256<<22))   G=(256<<22)-1; \
624            else if (G<0)G=0;   \
625            if (B>=(256<<22))   B=(256<<22)-1; \
626            else if (B<0)B=0;   \
627        }\
628
629
630#define YSCALE_YUV_2_GRAY16_C \
631    for (i=0; i<(dstW>>1); i++){\
632        int j;\
633        int Y1 = 1<<18;\
634        int Y2 = 1<<18;\
635        int U  = 1<<18;\
636        int V  = 1<<18;\
637        \
638        const int i2= 2*i;\
639        \
640        for (j=0; j<lumFilterSize; j++)\
641        {\
642            Y1 += lumSrc[j][i2] * lumFilter[j];\
643            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
644        }\
645        Y1>>=11;\
646        Y2>>=11;\
647        if ((Y1|Y2|U|V)&65536)\
648        {\
649            if (Y1>65535)   Y1=65535; \
650            else if (Y1<0)Y1=0;   \
651            if (Y2>65535)   Y2=65535; \
652            else if (Y2<0)Y2=0;   \
653        }
654
655#define YSCALE_YUV_2_RGBX_C(type) \
656    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
657    r = (type *)c->table_rV[V];   \
658    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
659    b = (type *)c->table_bU[U];   \
660
661#define YSCALE_YUV_2_PACKED2_C   \
662    for (i=0; i<(dstW>>1); i++){ \
663        const int i2= 2*i;       \
664        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
665        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
666        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
667        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
668
669#define YSCALE_YUV_2_GRAY16_2_C   \
670    for (i=0; i<(dstW>>1); i++){ \
671        const int i2= 2*i;       \
672        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
673        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
674
675#define YSCALE_YUV_2_RGB2_C(type) \
676    YSCALE_YUV_2_PACKED2_C\
677    type *r, *b, *g;\
678    r = (type *)c->table_rV[V];\
679    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
680    b = (type *)c->table_bU[U];\
681
682#define YSCALE_YUV_2_PACKED1_C \
683    for (i=0; i<(dstW>>1); i++){\
684        const int i2= 2*i;\
685        int Y1= buf0[i2  ]>>7;\
686        int Y2= buf0[i2+1]>>7;\
687        int U= (uvbuf1[i     ])>>7;\
688        int V= (uvbuf1[i+VOFW])>>7;\
689
690#define YSCALE_YUV_2_GRAY16_1_C \
691    for (i=0; i<(dstW>>1); i++){\
692        const int i2= 2*i;\
693        int Y1= buf0[i2  ]<<1;\
694        int Y2= buf0[i2+1]<<1;\
695
696#define YSCALE_YUV_2_RGB1_C(type) \
697    YSCALE_YUV_2_PACKED1_C\
698    type *r, *b, *g;\
699    r = (type *)c->table_rV[V];\
700    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
701    b = (type *)c->table_bU[U];\
702
703#define YSCALE_YUV_2_PACKED1B_C \
704    for (i=0; i<(dstW>>1); i++){\
705        const int i2= 2*i;\
706        int Y1= buf0[i2  ]>>7;\
707        int Y2= buf0[i2+1]>>7;\
708        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
709        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
710
711#define YSCALE_YUV_2_RGB1B_C(type) \
712    YSCALE_YUV_2_PACKED1B_C\
713    type *r, *b, *g;\
714    r = (type *)c->table_rV[V];\
715    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
716    b = (type *)c->table_bU[U];\
717
718#define YSCALE_YUV_2_MONO2_C \
719    const uint8_t * const d128=dither_8x8_220[y&7];\
720    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
721    for (i=0; i<dstW-7; i+=8){\
722        int acc;\
723        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
724        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
725        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
726        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
727        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
728        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
729        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
730        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
731        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
732        dest++;\
733    }\
734
735
736#define YSCALE_YUV_2_MONOX_C \
737    const uint8_t * const d128=dither_8x8_220[y&7];\
738    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
739    int acc=0;\
740    for (i=0; i<dstW-1; i+=2){\
741        int j;\
742        int Y1=1<<18;\
743        int Y2=1<<18;\
744\
745        for (j=0; j<lumFilterSize; j++)\
746        {\
747            Y1 += lumSrc[j][i] * lumFilter[j];\
748            Y2 += lumSrc[j][i+1] * lumFilter[j];\
749        }\
750        Y1>>=19;\
751        Y2>>=19;\
752        if ((Y1|Y2)&256)\
753        {\
754            if (Y1>255)   Y1=255;\
755            else if (Y1<0)Y1=0;\
756            if (Y2>255)   Y2=255;\
757            else if (Y2<0)Y2=0;\
758        }\
759        acc+= acc + g[Y1+d128[(i+0)&7]];\
760        acc+= acc + g[Y2+d128[(i+1)&7]];\
761        if ((i&7)==6){\
762            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
763            dest++;\
764        }\
765    }
766
767
768#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
769    switch(c->dstFormat)\
770    {\
771    case PIX_FMT_RGB32:\
772    case PIX_FMT_BGR32:\
773    case PIX_FMT_RGB32_1:\
774    case PIX_FMT_BGR32_1:\
775        func(uint32_t)\
776            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
777            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
778        }                \
779        break;\
780    case PIX_FMT_RGB24:\
781        func(uint8_t)\
782            ((uint8_t*)dest)[0]= r[Y1];\
783            ((uint8_t*)dest)[1]= g[Y1];\
784            ((uint8_t*)dest)[2]= b[Y1];\
785            ((uint8_t*)dest)[3]= r[Y2];\
786            ((uint8_t*)dest)[4]= g[Y2];\
787            ((uint8_t*)dest)[5]= b[Y2];\
788            dest+=6;\
789        }\
790        break;\
791    case PIX_FMT_BGR24:\
792        func(uint8_t)\
793            ((uint8_t*)dest)[0]= b[Y1];\
794            ((uint8_t*)dest)[1]= g[Y1];\
795            ((uint8_t*)dest)[2]= r[Y1];\
796            ((uint8_t*)dest)[3]= b[Y2];\
797            ((uint8_t*)dest)[4]= g[Y2];\
798            ((uint8_t*)dest)[5]= r[Y2];\
799            dest+=6;\
800        }\
801        break;\
802    case PIX_FMT_RGB565:\
803    case PIX_FMT_BGR565:\
804        {\
805            const int dr1= dither_2x2_8[y&1    ][0];\
806            const int dg1= dither_2x2_4[y&1    ][0];\
807            const int db1= dither_2x2_8[(y&1)^1][0];\
808            const int dr2= dither_2x2_8[y&1    ][1];\
809            const int dg2= dither_2x2_4[y&1    ][1];\
810            const int db2= dither_2x2_8[(y&1)^1][1];\
811            func(uint16_t)\
812                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
813                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
814            }\
815        }\
816        break;\
817    case PIX_FMT_RGB555:\
818    case PIX_FMT_BGR555:\
819        {\
820            const int dr1= dither_2x2_8[y&1    ][0];\
821            const int dg1= dither_2x2_8[y&1    ][1];\
822            const int db1= dither_2x2_8[(y&1)^1][0];\
823            const int dr2= dither_2x2_8[y&1    ][1];\
824            const int dg2= dither_2x2_8[y&1    ][0];\
825            const int db2= dither_2x2_8[(y&1)^1][1];\
826            func(uint16_t)\
827                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
828                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
829            }\
830        }\
831        break;\
832    case PIX_FMT_RGB8:\
833    case PIX_FMT_BGR8:\
834        {\
835            const uint8_t * const d64= dither_8x8_73[y&7];\
836            const uint8_t * const d32= dither_8x8_32[y&7];\
837            func(uint8_t)\
838                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
839                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
840            }\
841        }\
842        break;\
843    case PIX_FMT_RGB4:\
844    case PIX_FMT_BGR4:\
845        {\
846            const uint8_t * const d64= dither_8x8_73 [y&7];\
847            const uint8_t * const d128=dither_8x8_220[y&7];\
848            func(uint8_t)\
849                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
850                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
851            }\
852        }\
853        break;\
854    case PIX_FMT_RGB4_BYTE:\
855    case PIX_FMT_BGR4_BYTE:\
856        {\
857            const uint8_t * const d64= dither_8x8_73 [y&7];\
858            const uint8_t * const d128=dither_8x8_220[y&7];\
859            func(uint8_t)\
860                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
861                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
862            }\
863        }\
864        break;\
865    case PIX_FMT_MONOBLACK:\
866    case PIX_FMT_MONOWHITE:\
867        {\
868            func_monoblack\
869        }\
870        break;\
871    case PIX_FMT_YUYV422:\
872        func2\
873            ((uint8_t*)dest)[2*i2+0]= Y1;\
874            ((uint8_t*)dest)[2*i2+1]= U;\
875            ((uint8_t*)dest)[2*i2+2]= Y2;\
876            ((uint8_t*)dest)[2*i2+3]= V;\
877        }                \
878        break;\
879    case PIX_FMT_UYVY422:\
880        func2\
881            ((uint8_t*)dest)[2*i2+0]= U;\
882            ((uint8_t*)dest)[2*i2+1]= Y1;\
883            ((uint8_t*)dest)[2*i2+2]= V;\
884            ((uint8_t*)dest)[2*i2+3]= Y2;\
885        }                \
886        break;\
887    case PIX_FMT_GRAY16BE:\
888        func_g16\
889            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
890            ((uint8_t*)dest)[2*i2+1]= Y1;\
891            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
892            ((uint8_t*)dest)[2*i2+3]= Y2;\
893        }                \
894        break;\
895    case PIX_FMT_GRAY16LE:\
896        func_g16\
897            ((uint8_t*)dest)[2*i2+0]= Y1;\
898            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
899            ((uint8_t*)dest)[2*i2+2]= Y2;\
900            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
901        }                \
902        break;\
903    }\
904
905
906static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
907                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
908                                  uint8_t *dest, int dstW, int y)
909{
910    int i;
911    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
912}
913
914static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
915                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
916                                    uint8_t *dest, int dstW, int y)
917{
918    int i;
919    int step= fmt_depth(c->dstFormat)/8;
920    int aidx= 3;
921
922    switch(c->dstFormat){
923    case PIX_FMT_ARGB:
924        dest++;
925        aidx= -1;
926    case PIX_FMT_RGB24:
927        aidx--;
928    case PIX_FMT_RGBA:
929        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
930            dest[aidx]= 255;
931            dest[0]= R>>22;
932            dest[1]= G>>22;
933            dest[2]= B>>22;
934            dest+= step;
935        }
936        break;
937    case PIX_FMT_ABGR:
938        dest++;
939        aidx= -1;
940    case PIX_FMT_BGR24:
941        aidx--;
942    case PIX_FMT_BGRA:
943        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
944            dest[aidx]= 255;
945            dest[0]= B>>22;
946            dest[1]= G>>22;
947            dest[2]= R>>22;
948            dest+= step;
949        }
950        break;
951    default:
952        assert(0);
953    }
954}
955
956//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
957//Plain C versions
958#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
959#define COMPILE_C
960#endif
961
962#if ARCH_PPC
963#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
964#define COMPILE_ALTIVEC
965#endif
966#endif //ARCH_PPC
967
968#if ARCH_X86
969
970#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
971#define COMPILE_MMX
972#endif
973
974#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
975#define COMPILE_MMX2
976#endif
977
978#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
979#define COMPILE_3DNOW
980#endif
981#endif //ARCH_X86
982
983#undef HAVE_MMX
984#undef HAVE_MMX2
985#undef HAVE_AMD3DNOW
986#undef HAVE_ALTIVEC
987#define HAVE_MMX 0
988#define HAVE_MMX2 0
989#define HAVE_AMD3DNOW 0
990#define HAVE_ALTIVEC 0
991
992#ifdef COMPILE_C
993#define RENAME(a) a ## _C
994#include "swscale_template.c"
995#endif
996
997#ifdef COMPILE_ALTIVEC
998#undef RENAME
999#undef HAVE_ALTIVEC
1000#define HAVE_ALTIVEC 1
1001#define RENAME(a) a ## _altivec
1002#include "swscale_template.c"
1003#endif
1004
1005#if ARCH_X86
1006
1007//x86 versions
1008/*
1009#undef RENAME
1010#undef HAVE_MMX
1011#undef HAVE_MMX2
1012#undef HAVE_AMD3DNOW
1013#define ARCH_X86
1014#define RENAME(a) a ## _X86
1015#include "swscale_template.c"
1016*/
1017//MMX versions
1018#ifdef COMPILE_MMX
1019#undef RENAME
1020#undef HAVE_MMX
1021#undef HAVE_MMX2
1022#undef HAVE_AMD3DNOW
1023#define HAVE_MMX 1
1024#define HAVE_MMX2 0
1025#define HAVE_AMD3DNOW 0
1026#define RENAME(a) a ## _MMX
1027#include "swscale_template.c"
1028#endif
1029
1030//MMX2 versions
1031#ifdef COMPILE_MMX2
1032#undef RENAME
1033#undef HAVE_MMX
1034#undef HAVE_MMX2
1035#undef HAVE_AMD3DNOW
1036#define HAVE_MMX 1
1037#define HAVE_MMX2 1
1038#define HAVE_AMD3DNOW 0
1039#define RENAME(a) a ## _MMX2
1040#include "swscale_template.c"
1041#endif
1042
1043//3DNOW versions
1044#ifdef COMPILE_3DNOW
1045#undef RENAME
1046#undef HAVE_MMX
1047#undef HAVE_MMX2
1048#undef HAVE_AMD3DNOW
1049#define HAVE_MMX 1
1050#define HAVE_MMX2 0
1051#define HAVE_AMD3DNOW 1
1052#define RENAME(a) a ## _3DNow
1053#include "swscale_template.c"
1054#endif
1055
1056#endif //ARCH_X86
1057
1058// minor note: the HAVE_xyz are messed up after this line so don't use them
1059
1060static double getSplineCoeff(double a, double b, double c, double d, double dist)
1061{
1062//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1063    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
1064    else                return getSplineCoeff(        0.0,
1065                                             b+ 2.0*c + 3.0*d,
1066                                                    c + 3.0*d,
1067                                            -b- 3.0*c - 6.0*d,
1068                                            dist-1.0);
1069}
1070
1071static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1072                             int srcW, int dstW, int filterAlign, int one, int flags,
1073                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1074{
1075    int i;
1076    int filterSize;
1077    int filter2Size;
1078    int minFilterSize;
1079    int64_t *filter=NULL;
1080    int64_t *filter2=NULL;
1081    const int64_t fone= 1LL<<54;
1082    int ret= -1;
1083#if ARCH_X86
1084    if (flags & SWS_CPU_CAPS_MMX)
1085        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1086#endif
1087
1088    // NOTE: the +1 is for the MMX scaler which reads over the end
1089    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1090
1091    if (FFABS(xInc - 0x10000) <10) // unscaled
1092    {
1093        int i;
1094        filterSize= 1;
1095        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
1096
1097        for (i=0; i<dstW; i++)
1098        {
1099            filter[i*filterSize]= fone;
1100            (*filterPos)[i]=i;
1101        }
1102
1103    }
1104    else if (flags&SWS_POINT) // lame looking point sampling mode
1105    {
1106        int i;
1107        int xDstInSrc;
1108        filterSize= 1;
1109        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1110
1111        xDstInSrc= xInc/2 - 0x8000;
1112        for (i=0; i<dstW; i++)
1113        {
1114            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1115
1116            (*filterPos)[i]= xx;
1117            filter[i]= fone;
1118            xDstInSrc+= xInc;
1119        }
1120    }
1121    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1122    {
1123        int i;
1124        int xDstInSrc;
1125        if      (flags&SWS_BICUBIC) filterSize= 4;
1126        else if (flags&SWS_X      ) filterSize= 4;
1127        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1128        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1129
1130        xDstInSrc= xInc/2 - 0x8000;
1131        for (i=0; i<dstW; i++)
1132        {
1133            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1134            int j;
1135
1136            (*filterPos)[i]= xx;
1137                //bilinear upscale / linear interpolate / area averaging
1138                for (j=0; j<filterSize; j++)
1139                {
1140                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1141                    if (coeff<0) coeff=0;
1142                    filter[i*filterSize + j]= coeff;
1143                    xx++;
1144                }
1145            xDstInSrc+= xInc;
1146        }
1147    }
1148    else
1149    {
1150        int xDstInSrc;
1151        int sizeFactor;
1152
1153        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1154        else if (flags&SWS_X)            sizeFactor=  8;
1155        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1156        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1157        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1158        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1159        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1160        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1161        else {
1162            sizeFactor= 0; //GCC warning killer
1163            assert(0);
1164        }
1165
1166        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1167        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1168
1169        if (filterSize > srcW-2) filterSize=srcW-2;
1170
1171        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1172
1173        xDstInSrc= xInc - 0x10000;
1174        for (i=0; i<dstW; i++)
1175        {
1176            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1177            int j;
1178            (*filterPos)[i]= xx;
1179            for (j=0; j<filterSize; j++)
1180            {
1181                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1182                double floatd;
1183                int64_t coeff;
1184
1185                if (xInc > 1<<16)
1186                    d= d*dstW/srcW;
1187                floatd= d * (1.0/(1<<30));
1188
1189                if (flags & SWS_BICUBIC)
1190                {
1191                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1192                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1193                    int64_t dd = ( d*d)>>30;
1194                    int64_t ddd= (dd*d)>>30;
1195
1196                    if      (d < 1LL<<30)
1197                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1198                    else if (d < 1LL<<31)
1199                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1200                    else
1201                        coeff=0.0;
1202                    coeff *= fone>>(30+24);
1203                }
1204/*                else if (flags & SWS_X)
1205                {
1206                    double p= param ? param*0.01 : 0.3;
1207                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1208                    coeff*= pow(2.0, - p*d*d);
1209                }*/
1210                else if (flags & SWS_X)
1211                {
1212                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1213                    double c;
1214
1215                    if (floatd<1.0)
1216                        c = cos(floatd*PI);
1217                    else
1218                        c=-1.0;
1219                    if (c<0.0)      c= -pow(-c, A);
1220                    else            c=  pow( c, A);
1221                    coeff= (c*0.5 + 0.5)*fone;
1222                }
1223                else if (flags & SWS_AREA)
1224                {
1225                    int64_t d2= d - (1<<29);
1226                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1227                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1228                    else coeff=0.0;
1229                    coeff *= fone>>(30+16);
1230                }
1231                else if (flags & SWS_GAUSS)
1232                {
1233                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1234                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1235                }
1236                else if (flags & SWS_SINC)
1237                {
1238                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1239                }
1240                else if (flags & SWS_LANCZOS)
1241                {
1242                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1243                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1244                    if (floatd>p) coeff=0;
1245                }
1246                else if (flags & SWS_BILINEAR)
1247                {
1248                    coeff= (1<<30) - d;
1249                    if (coeff<0) coeff=0;
1250                    coeff *= fone >> 30;
1251                }
1252                else if (flags & SWS_SPLINE)
1253                {
1254                    double p=-2.196152422706632;
1255                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1256                }
1257                else {
1258                    coeff= 0.0; //GCC warning killer
1259                    assert(0);
1260                }
1261
1262                filter[i*filterSize + j]= coeff;
1263                xx++;
1264            }
1265            xDstInSrc+= 2*xInc;
1266        }
1267    }
1268
1269    /* apply src & dst Filter to filter -> filter2
1270       av_free(filter);
1271    */
1272    assert(filterSize>0);
1273    filter2Size= filterSize;
1274    if (srcFilter) filter2Size+= srcFilter->length - 1;
1275    if (dstFilter) filter2Size+= dstFilter->length - 1;
1276    assert(filter2Size>0);
1277    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
1278
1279    for (i=0; i<dstW; i++)
1280    {
1281        int j, k;
1282
1283        if(srcFilter){
1284            for (k=0; k<srcFilter->length; k++){
1285                for (j=0; j<filterSize; j++)
1286                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1287            }
1288        }else{
1289            for (j=0; j<filterSize; j++)
1290                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1291        }
1292        //FIXME dstFilter
1293
1294        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1295    }
1296    av_freep(&filter);
1297
1298    /* try to reduce the filter-size (step1 find size and shift left) */
1299    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1300    minFilterSize= 0;
1301    for (i=dstW-1; i>=0; i--)
1302    {
1303        int min= filter2Size;
1304        int j;
1305        int64_t cutOff=0.0;
1306
1307        /* get rid off near zero elements on the left by shifting left */
1308        for (j=0; j<filter2Size; j++)
1309        {
1310            int k;
1311            cutOff += FFABS(filter2[i*filter2Size]);
1312
1313            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1314
1315            /* preserve monotonicity because the core can't handle the filter otherwise */
1316            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1317
1318            // move filter coefficients left
1319            for (k=1; k<filter2Size; k++)
1320                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1321            filter2[i*filter2Size + k - 1]= 0;
1322            (*filterPos)[i]++;
1323        }
1324
1325        cutOff=0;
1326        /* count near zeros on the right */
1327        for (j=filter2Size-1; j>0; j--)
1328        {
1329            cutOff += FFABS(filter2[i*filter2Size + j]);
1330
1331            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1332            min--;
1333        }
1334
1335        if (min>minFilterSize) minFilterSize= min;
1336    }
1337
1338    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1339        // we can handle the special case 4,
1340        // so we don't want to go to the full 8
1341        if (minFilterSize < 5)
1342            filterAlign = 4;
1343
1344        // We really don't want to waste our time
1345        // doing useless computation, so fall back on
1346        // the scalar C code for very small filters.
1347        // Vectorizing is worth it only if you have a
1348        // decent-sized vector.
1349        if (minFilterSize < 3)
1350            filterAlign = 1;
1351    }
1352
1353    if (flags & SWS_CPU_CAPS_MMX) {
1354        // special case for unscaled vertical filtering
1355        if (minFilterSize == 1 && filterAlign == 2)
1356            filterAlign= 1;
1357    }
1358
1359    assert(minFilterSize > 0);
1360    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1361    assert(filterSize > 0);
1362    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1363    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1364        goto error;
1365    *outFilterSize= filterSize;
1366
1367    if (flags&SWS_PRINT_INFO)
1368        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1369    /* try to reduce the filter-size (step2 reduce it) */
1370    for (i=0; i<dstW; i++)
1371    {
1372        int j;
1373
1374        for (j=0; j<filterSize; j++)
1375        {
1376            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1377            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1378            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1379                filter[i*filterSize + j]= 0;
1380        }
1381    }
1382
1383
1384    //FIXME try to align filterPos if possible
1385
1386    //fix borders
1387    for (i=0; i<dstW; i++)
1388    {
1389        int j;
1390        if ((*filterPos)[i] < 0)
1391        {
1392            // move filter coefficients left to compensate for filterPos
1393            for (j=1; j<filterSize; j++)
1394            {
1395                int left= FFMAX(j + (*filterPos)[i], 0);
1396                filter[i*filterSize + left] += filter[i*filterSize + j];
1397                filter[i*filterSize + j]=0;
1398            }
1399            (*filterPos)[i]= 0;
1400        }
1401
1402        if ((*filterPos)[i] + filterSize > srcW)
1403        {
1404            int shift= (*filterPos)[i] + filterSize - srcW;
1405            // move filter coefficients right to compensate for filterPos
1406            for (j=filterSize-2; j>=0; j--)
1407            {
1408                int right= FFMIN(j + shift, filterSize-1);
1409                filter[i*filterSize +right] += filter[i*filterSize +j];
1410                filter[i*filterSize +j]=0;
1411            }
1412            (*filterPos)[i]= srcW - filterSize;
1413        }
1414    }
1415
1416    // Note the +1 is for the MMX scaler which reads over the end
1417    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1418    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1419
1420    /* normalize & store in outFilter */
1421    for (i=0; i<dstW; i++)
1422    {
1423        int j;
1424        int64_t error=0;
1425        int64_t sum=0;
1426
1427        for (j=0; j<filterSize; j++)
1428        {
1429            sum+= filter[i*filterSize + j];
1430        }
1431        sum= (sum + one/2)/ one;
1432        for (j=0; j<*outFilterSize; j++)
1433        {
1434            int64_t v= filter[i*filterSize + j] + error;
1435            int intV= ROUNDED_DIV(v, sum);
1436            (*outFilter)[i*(*outFilterSize) + j]= intV;
1437            error= v - intV*sum;
1438        }
1439    }
1440
1441    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1442    for (i=0; i<*outFilterSize; i++)
1443    {
1444        int j= dstW*(*outFilterSize);
1445        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1446    }
1447
1448    ret=0;
1449error:
1450    av_free(filter);
1451    av_free(filter2);
1452    return ret;
1453}
1454
1455#ifdef COMPILE_MMX2
1456static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1457{
1458    uint8_t *fragmentA;
1459    long imm8OfPShufW1A;
1460    long imm8OfPShufW2A;
1461    long fragmentLengthA;
1462    uint8_t *fragmentB;
1463    long imm8OfPShufW1B;
1464    long imm8OfPShufW2B;
1465    long fragmentLengthB;
1466    int fragmentPos;
1467
1468    int xpos, i;
1469
1470    // create an optimized horizontal scaling routine
1471
1472    //code fragment
1473
1474    __asm__ volatile(
1475        "jmp                         9f                 \n\t"
1476    // Begin
1477        "0:                                             \n\t"
1478        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1479        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1480        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1481        "punpcklbw                %%mm7, %%mm1          \n\t"
1482        "punpcklbw                %%mm7, %%mm0          \n\t"
1483        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1484        "1:                                             \n\t"
1485        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1486        "2:                                             \n\t"
1487        "psubw                    %%mm1, %%mm0          \n\t"
1488        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1489        "pmullw                   %%mm3, %%mm0          \n\t"
1490        "psllw                       $7, %%mm1          \n\t"
1491        "paddw                    %%mm1, %%mm0          \n\t"
1492
1493        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1494
1495        "add                         $8, %%"REG_a"      \n\t"
1496    // End
1497        "9:                                             \n\t"
1498//        "int $3                                         \n\t"
1499        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1500        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1501        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1502        "dec                         %1                 \n\t"
1503        "dec                         %2                 \n\t"
1504        "sub                         %0, %1             \n\t"
1505        "sub                         %0, %2             \n\t"
1506        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1507        "sub                         %0, %3             \n\t"
1508
1509
1510        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1511        "=r" (fragmentLengthA)
1512    );
1513
1514    __asm__ volatile(
1515        "jmp                         9f                 \n\t"
1516    // Begin
1517        "0:                                             \n\t"
1518        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1519        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1520        "punpcklbw                %%mm7, %%mm0          \n\t"
1521        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1522        "1:                                             \n\t"
1523        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1524        "2:                                             \n\t"
1525        "psubw                    %%mm1, %%mm0          \n\t"
1526        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1527        "pmullw                   %%mm3, %%mm0          \n\t"
1528        "psllw                       $7, %%mm1          \n\t"
1529        "paddw                    %%mm1, %%mm0          \n\t"
1530
1531        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1532
1533        "add                         $8, %%"REG_a"      \n\t"
1534    // End
1535        "9:                                             \n\t"
1536//        "int                       $3                   \n\t"
1537        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1538        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1539        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1540        "dec                         %1                 \n\t"
1541        "dec                         %2                 \n\t"
1542        "sub                         %0, %1             \n\t"
1543        "sub                         %0, %2             \n\t"
1544        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1545        "sub                         %0, %3             \n\t"
1546
1547
1548        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1549        "=r" (fragmentLengthB)
1550    );
1551
1552    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1553    fragmentPos=0;
1554
1555    for (i=0; i<dstW/numSplits; i++)
1556    {
1557        int xx=xpos>>16;
1558
1559        if ((i&3) == 0)
1560        {
1561            int a=0;
1562            int b=((xpos+xInc)>>16) - xx;
1563            int c=((xpos+xInc*2)>>16) - xx;
1564            int d=((xpos+xInc*3)>>16) - xx;
1565
1566            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1567            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1568            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1569            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1570            filterPos[i/2]= xx;
1571
1572            if (d+1<4)
1573            {
1574                int maxShift= 3-(d+1);
1575                int shift=0;
1576
1577                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1578
1579                funnyCode[fragmentPos + imm8OfPShufW1B]=
1580                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1581                funnyCode[fragmentPos + imm8OfPShufW2B]=
1582                    a | (b<<2) | (c<<4) | (d<<6);
1583
1584                if (i+3>=dstW) shift=maxShift; //avoid overread
1585                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1586
1587                if (shift && i>=shift)
1588                {
1589                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1590                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1591                    filterPos[i/2]-=shift;
1592                }
1593
1594                fragmentPos+= fragmentLengthB;
1595            }
1596            else
1597            {
1598                int maxShift= 3-d;
1599                int shift=0;
1600
1601                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1602
1603                funnyCode[fragmentPos + imm8OfPShufW1A]=
1604                funnyCode[fragmentPos + imm8OfPShufW2A]=
1605                    a | (b<<2) | (c<<4) | (d<<6);
1606
1607                if (i+4>=dstW) shift=maxShift; //avoid overread
1608                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1609
1610                if (shift && i>=shift)
1611                {
1612                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1613                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1614                    filterPos[i/2]-=shift;
1615                }
1616
1617                fragmentPos+= fragmentLengthA;
1618            }
1619
1620            funnyCode[fragmentPos]= RET;
1621        }
1622        xpos+=xInc;
1623    }
1624    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1625}
1626#endif /* COMPILE_MMX2 */
1627
1628static void globalInit(void){
1629    // generating tables:
1630    int i;
1631    for (i=0; i<768; i++){
1632        int c= av_clip_uint8(i-256);
1633        clip_table[i]=c;
1634    }
1635}
1636
1637static SwsFunc getSwsFunc(int flags){
1638
1639#if CONFIG_RUNTIME_CPUDETECT
1640#if ARCH_X86 && CONFIG_GPL
1641    // ordered per speed fastest first
1642    if (flags & SWS_CPU_CAPS_MMX2)
1643        return swScale_MMX2;
1644    else if (flags & SWS_CPU_CAPS_3DNOW)
1645        return swScale_3DNow;
1646    else if (flags & SWS_CPU_CAPS_MMX)
1647        return swScale_MMX;
1648    else
1649        return swScale_C;
1650
1651#else
1652#if ARCH_PPC
1653    if (flags & SWS_CPU_CAPS_ALTIVEC)
1654        return swScale_altivec;
1655    else
1656        return swScale_C;
1657#endif
1658    return swScale_C;
1659#endif /* ARCH_X86 && CONFIG_GPL */
1660#else //CONFIG_RUNTIME_CPUDETECT
1661#if   HAVE_MMX2
1662    return swScale_MMX2;
1663#elif HAVE_AMD3DNOW
1664    return swScale_3DNow;
1665#elif HAVE_MMX
1666    return swScale_MMX;
1667#elif HAVE_ALTIVEC
1668    return swScale_altivec;
1669#else
1670    return swScale_C;
1671#endif
1672#endif //!CONFIG_RUNTIME_CPUDETECT
1673}
1674
1675static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1676                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1677    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1678    /* Copy Y plane */
1679    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1680        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1681    else
1682    {
1683        int i;
1684        uint8_t *srcPtr= src[0];
1685        uint8_t *dstPtr= dst;
1686        for (i=0; i<srcSliceH; i++)
1687        {
1688            memcpy(dstPtr, srcPtr, c->srcW);
1689            srcPtr+= srcStride[0];
1690            dstPtr+= dstStride[0];
1691        }
1692    }
1693    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1694    if (c->dstFormat == PIX_FMT_NV12)
1695        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
1696    else
1697        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
1698
1699    return srcSliceH;
1700}
1701
1702static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1703                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1704    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1705
1706    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1707
1708    return srcSliceH;
1709}
1710
1711static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1712                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1713    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1714
1715    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1716
1717    return srcSliceH;
1718}
1719
1720static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1721                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1722    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1723
1724    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1725
1726    return srcSliceH;
1727}
1728
1729static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1730                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1731    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1732
1733    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1734
1735    return srcSliceH;
1736}
1737
1738static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1739                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1740    const enum PixelFormat srcFormat= c->srcFormat;
1741    const enum PixelFormat dstFormat= c->dstFormat;
1742    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
1743                 const uint8_t *palette)=NULL;
1744    int i;
1745    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1746    uint8_t *srcPtr= src[0];
1747
1748    if (!usePal(srcFormat))
1749        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1750               sws_format_name(srcFormat), sws_format_name(dstFormat));
1751
1752    switch(dstFormat){
1753    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
1754    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
1755    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
1756    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
1757    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
1758    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
1759    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1760                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1761    }
1762
1763
1764    for (i=0; i<srcSliceH; i++) {
1765        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
1766        srcPtr+= srcStride[0];
1767        dstPtr+= dstStride[0];
1768    }
1769
1770    return srcSliceH;
1771}
1772
1773/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
1774static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1775                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1776    const enum PixelFormat srcFormat= c->srcFormat;
1777    const enum PixelFormat dstFormat= c->dstFormat;
1778    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1779    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1780    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1781    const int dstId= fmt_depth(dstFormat) >> 2;
1782    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1783
1784    /* BGR -> BGR */
1785    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1786       || (isRGB(srcFormat) && isRGB(dstFormat))){
1787        switch(srcId | (dstId<<4)){
1788        case 0x34: conv= rgb16to15; break;
1789        case 0x36: conv= rgb24to15; break;
1790        case 0x38: conv= rgb32to15; break;
1791        case 0x43: conv= rgb15to16; break;
1792        case 0x46: conv= rgb24to16; break;
1793        case 0x48: conv= rgb32to16; break;
1794        case 0x63: conv= rgb15to24; break;
1795        case 0x64: conv= rgb16to24; break;
1796        case 0x68: conv= rgb32to24; break;
1797        case 0x83: conv= rgb15to32; break;
1798        case 0x84: conv= rgb16to32; break;
1799        case 0x86: conv= rgb24to32; break;
1800        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1801                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1802        }
1803    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1804             || (isRGB(srcFormat) && isBGR(dstFormat))){
1805        switch(srcId | (dstId<<4)){
1806        case 0x33: conv= rgb15tobgr15; break;
1807        case 0x34: conv= rgb16tobgr15; break;
1808        case 0x36: conv= rgb24tobgr15; break;
1809        case 0x38: conv= rgb32tobgr15; break;
1810        case 0x43: conv= rgb15tobgr16; break;
1811        case 0x44: conv= rgb16tobgr16; break;
1812        case 0x46: conv= rgb24tobgr16; break;
1813        case 0x48: conv= rgb32tobgr16; break;
1814        case 0x63: conv= rgb15tobgr24; break;
1815        case 0x64: conv= rgb16tobgr24; break;
1816        case 0x66: conv= rgb24tobgr24; break;
1817        case 0x68: conv= rgb32tobgr24; break;
1818        case 0x83: conv= rgb15tobgr32; break;
1819        case 0x84: conv= rgb16tobgr32; break;
1820        case 0x86: conv= rgb24tobgr32; break;
1821        case 0x88: conv= rgb32tobgr32; break;
1822        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1823                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1824        }
1825    }else{
1826        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1827               sws_format_name(srcFormat), sws_format_name(dstFormat));
1828    }
1829
1830    if(conv)
1831    {
1832        uint8_t *srcPtr= src[0];
1833        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
1834            srcPtr += ALT32_CORR;
1835
1836        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
1837            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1838        else
1839        {
1840            int i;
1841            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1842
1843            for (i=0; i<srcSliceH; i++)
1844            {
1845                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1846                srcPtr+= srcStride[0];
1847                dstPtr+= dstStride[0];
1848            }
1849        }
1850    }
1851    return srcSliceH;
1852}
1853
1854static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1855                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1856
1857    rgb24toyv12(
1858        src[0],
1859        dst[0]+ srcSliceY    *dstStride[0],
1860        dst[1]+(srcSliceY>>1)*dstStride[1],
1861        dst[2]+(srcSliceY>>1)*dstStride[2],
1862        c->srcW, srcSliceH,
1863        dstStride[0], dstStride[1], srcStride[0]);
1864    return srcSliceH;
1865}
1866
1867static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1868                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1869    int i;
1870
1871    /* copy Y */
1872    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1873        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1874    else{
1875        uint8_t *srcPtr= src[0];
1876        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1877
1878        for (i=0; i<srcSliceH; i++)
1879        {
1880            memcpy(dstPtr, srcPtr, c->srcW);
1881            srcPtr+= srcStride[0];
1882            dstPtr+= dstStride[0];
1883        }
1884    }
1885
1886    if (c->dstFormat==PIX_FMT_YUV420P){
1887        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1888        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1889    }else{
1890        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1891        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1892    }
1893    return srcSliceH;
1894}
1895
1896/* unscaled copy like stuff (assumes nearly identical formats) */
1897static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1898                      int srcSliceH, uint8_t* dst[], int dstStride[])
1899{
1900    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1901        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1902    else
1903    {
1904        int i;
1905        uint8_t *srcPtr= src[0];
1906        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1907        int length=0;
1908
1909        /* universal length finder */
1910        while(length+c->srcW <= FFABS(dstStride[0])
1911           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1912        assert(length!=0);
1913
1914        for (i=0; i<srcSliceH; i++)
1915        {
1916            memcpy(dstPtr, srcPtr, length);
1917            srcPtr+= srcStride[0];
1918            dstPtr+= dstStride[0];
1919        }
1920    }
1921    return srcSliceH;
1922}
1923
1924static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1925                      int srcSliceH, uint8_t* dst[], int dstStride[])
1926{
1927    int plane;
1928    for (plane=0; plane<3; plane++)
1929    {
1930        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1931        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1932        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1933
1934        if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1935        {
1936            if (!isGray(c->dstFormat))
1937                memset(dst[plane], 128, dstStride[plane]*height);
1938        }
1939        else
1940        {
1941            if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1942                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1943            else
1944            {
1945                int i;
1946                uint8_t *srcPtr= src[plane];
1947                uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1948                for (i=0; i<height; i++)
1949                {
1950                    memcpy(dstPtr, srcPtr, length);
1951                    srcPtr+= srcStride[plane];
1952                    dstPtr+= dstStride[plane];
1953                }
1954            }
1955        }
1956    }
1957    return srcSliceH;
1958}
1959
1960static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1961                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1962
1963    int length= c->srcW;
1964    int y=      srcSliceY;
1965    int height= srcSliceH;
1966    int i, j;
1967    uint8_t *srcPtr= src[0];
1968    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1969
1970    if (!isGray(c->dstFormat)){
1971        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1972        memset(dst[1], 128, dstStride[1]*height);
1973        memset(dst[2], 128, dstStride[2]*height);
1974    }
1975    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1976    for (i=0; i<height; i++)
1977    {
1978        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1979        srcPtr+= srcStride[0];
1980        dstPtr+= dstStride[0];
1981    }
1982    return srcSliceH;
1983}
1984
1985static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1986                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1987
1988    int length= c->srcW;
1989    int y=      srcSliceY;
1990    int height= srcSliceH;
1991    int i, j;
1992    uint8_t *srcPtr= src[0];
1993    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1994    for (i=0; i<height; i++)
1995    {
1996        for (j=0; j<length; j++)
1997        {
1998            dstPtr[j<<1] = srcPtr[j];
1999            dstPtr[(j<<1)+1] = srcPtr[j];
2000        }
2001        srcPtr+= srcStride[0];
2002        dstPtr+= dstStride[0];
2003    }
2004    return srcSliceH;
2005}
2006
2007static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2008                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2009
2010    int length= c->srcW;
2011    int y=      srcSliceY;
2012    int height= srcSliceH;
2013    int i, j;
2014    uint16_t *srcPtr= (uint16_t*)src[0];
2015    uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
2016    for (i=0; i<height; i++)
2017    {
2018        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
2019        srcPtr+= srcStride[0]/2;
2020        dstPtr+= dstStride[0]/2;
2021    }
2022    return srcSliceH;
2023}
2024
2025
2026static void getSubSampleFactors(int *h, int *v, int format){
2027    switch(format){
2028    case PIX_FMT_UYVY422:
2029    case PIX_FMT_YUYV422:
2030        *h=1;
2031        *v=0;
2032        break;
2033    case PIX_FMT_YUV420P:
2034    case PIX_FMT_YUVA420P:
2035    case PIX_FMT_GRAY16BE:
2036    case PIX_FMT_GRAY16LE:
2037    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
2038    case PIX_FMT_NV12:
2039    case PIX_FMT_NV21:
2040        *h=1;
2041        *v=1;
2042        break;
2043    case PIX_FMT_YUV440P:
2044        *h=0;
2045        *v=1;
2046        break;
2047    case PIX_FMT_YUV410P:
2048        *h=2;
2049        *v=2;
2050        break;
2051    case PIX_FMT_YUV444P:
2052        *h=0;
2053        *v=0;
2054        break;
2055    case PIX_FMT_YUV422P:
2056        *h=1;
2057        *v=0;
2058        break;
2059    case PIX_FMT_YUV411P:
2060        *h=2;
2061        *v=0;
2062        break;
2063    default:
2064        *h=0;
2065        *v=0;
2066        break;
2067    }
2068}
2069
2070static uint16_t roundToInt16(int64_t f){
2071    int r= (f + (1<<15))>>16;
2072         if (r<-0x7FFF) return 0x8000;
2073    else if (r> 0x7FFF) return 0x7FFF;
2074    else                return r;
2075}
2076
2077/**
2078 * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
2079 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
2080 * @return -1 if not supported
2081 */
2082int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
2083    int64_t crv =  inv_table[0];
2084    int64_t cbu =  inv_table[1];
2085    int64_t cgu = -inv_table[2];
2086    int64_t cgv = -inv_table[3];
2087    int64_t cy  = 1<<16;
2088    int64_t oy  = 0;
2089
2090    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2091    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2092
2093    c->brightness= brightness;
2094    c->contrast  = contrast;
2095    c->saturation= saturation;
2096    c->srcRange  = srcRange;
2097    c->dstRange  = dstRange;
2098    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
2099
2100    c->uOffset=   0x0400040004000400LL;
2101    c->vOffset=   0x0400040004000400LL;
2102
2103    if (!srcRange){
2104        cy= (cy*255) / 219;
2105        oy= 16<<16;
2106    }else{
2107        crv= (crv*224) / 255;
2108        cbu= (cbu*224) / 255;
2109        cgu= (cgu*224) / 255;
2110        cgv= (cgv*224) / 255;
2111    }
2112
2113    cy = (cy *contrast             )>>16;
2114    crv= (crv*contrast * saturation)>>32;
2115    cbu= (cbu*contrast * saturation)>>32;
2116    cgu= (cgu*contrast * saturation)>>32;
2117    cgv= (cgv*contrast * saturation)>>32;
2118
2119    oy -= 256*brightness;
2120
2121    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2122    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2123    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2124    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2125    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2126    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2127
2128    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2129    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2130    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2131    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2132    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2133    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2134
2135    sws_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2136    //FIXME factorize
2137
2138#ifdef COMPILE_ALTIVEC
2139    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2140        sws_yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
2141#endif
2142    return 0;
2143}
2144
2145/**
2146 * @return -1 if not supported
2147 */
2148int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
2149    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2150
2151    *inv_table = c->srcColorspaceTable;
2152    *table     = c->dstColorspaceTable;
2153    *srcRange  = c->srcRange;
2154    *dstRange  = c->dstRange;
2155    *brightness= c->brightness;
2156    *contrast  = c->contrast;
2157    *saturation= c->saturation;
2158
2159    return 0;
2160}
2161
2162static int handle_jpeg(enum PixelFormat *format)
2163{
2164    switch (*format) {
2165        case PIX_FMT_YUVJ420P:
2166            *format = PIX_FMT_YUV420P;
2167            return 1;
2168        case PIX_FMT_YUVJ422P:
2169            *format = PIX_FMT_YUV422P;
2170            return 1;
2171        case PIX_FMT_YUVJ444P:
2172            *format = PIX_FMT_YUV444P;
2173            return 1;
2174        case PIX_FMT_YUVJ440P:
2175            *format = PIX_FMT_YUV440P;
2176            return 1;
2177        default:
2178            return 0;
2179    }
2180}
2181
2182SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2183                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2184
2185    SwsContext *c;
2186    int i;
2187    int usesVFilter, usesHFilter;
2188    int unscaled, needsDither;
2189    int srcRange, dstRange;
2190    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2191#if ARCH_X86
2192    if (flags & SWS_CPU_CAPS_MMX)
2193        __asm__ volatile("emms\n\t"::: "memory");
2194#endif
2195
2196#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
2197    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2198#if   HAVE_MMX2
2199    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2200#elif HAVE_AMD3DNOW
2201    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2202#elif HAVE_MMX
2203    flags |= SWS_CPU_CAPS_MMX;
2204#elif HAVE_ALTIVEC
2205    flags |= SWS_CPU_CAPS_ALTIVEC;
2206#elif ARCH_BFIN
2207    flags |= SWS_CPU_CAPS_BFIN;
2208#endif
2209#endif /* CONFIG_RUNTIME_CPUDETECT */
2210    if (clip_table[512] != 255) globalInit();
2211    if (!rgb15to16) sws_rgb2rgb_init(flags);
2212
2213    unscaled = (srcW == dstW && srcH == dstH);
2214    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2215        && (fmt_depth(dstFormat))<24
2216        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2217
2218    srcRange = handle_jpeg(&srcFormat);
2219    dstRange = handle_jpeg(&dstFormat);
2220
2221    if (!isSupportedIn(srcFormat))
2222    {
2223        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2224        return NULL;
2225    }
2226    if (!isSupportedOut(dstFormat))
2227    {
2228        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2229        return NULL;
2230    }
2231
2232    i= flags & ( SWS_POINT
2233                |SWS_AREA
2234                |SWS_BILINEAR
2235                |SWS_FAST_BILINEAR
2236                |SWS_BICUBIC
2237                |SWS_X
2238                |SWS_GAUSS
2239                |SWS_LANCZOS
2240                |SWS_SINC
2241                |SWS_SPLINE
2242                |SWS_BICUBLIN);
2243    if(!i || (i & (i-1)))
2244    {
2245        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2246        return NULL;
2247    }
2248
2249    /* sanity check */
2250    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2251    {
2252        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2253               srcW, srcH, dstW, dstH);
2254        return NULL;
2255    }
2256    if(srcW > VOFW || dstW > VOFW){
2257        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2258        return NULL;
2259    }
2260
2261    if (!dstFilter) dstFilter= &dummyFilter;
2262    if (!srcFilter) srcFilter= &dummyFilter;
2263
2264    c= av_mallocz(sizeof(SwsContext));
2265
2266    c->av_class = &sws_context_class;
2267    c->srcW= srcW;
2268    c->srcH= srcH;
2269    c->dstW= dstW;
2270    c->dstH= dstH;
2271    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2272    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2273    c->flags= flags;
2274    c->dstFormat= dstFormat;
2275    c->srcFormat= srcFormat;
2276    c->vRounder= 4* 0x0001000100010001ULL;
2277
2278    usesHFilter= usesVFilter= 0;
2279    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2280    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2281    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2282    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2283    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2284    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2285    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2286    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2287
2288    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2289    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2290
2291    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2292    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2293
2294    // drop some chroma lines if the user wants it
2295    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2296    c->chrSrcVSubSample+= c->vChrDrop;
2297
2298    // drop every other pixel for chroma calculation unless user wants full chroma
2299    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2300      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2301      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2302      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2303      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2304        c->chrSrcHSubSample=1;
2305
2306    if (param){
2307        c->param[0] = param[0];
2308        c->param[1] = param[1];
2309    }else{
2310        c->param[0] =
2311        c->param[1] = SWS_PARAM_DEFAULT;
2312    }
2313
2314    c->chrIntHSubSample= c->chrDstHSubSample;
2315    c->chrIntVSubSample= c->chrSrcVSubSample;
2316
2317    // Note the -((-x)>>y) is so that we always round toward +inf.
2318    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2319    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2320    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2321    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2322
2323    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2324
2325    /* unscaled special cases */
2326    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
2327    {
2328        /* yv12_to_nv12 */
2329        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2330        {
2331            c->swScale= PlanarToNV12Wrapper;
2332        }
2333        /* yuv2bgr */
2334        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
2335            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
2336        {
2337            c->swScale= sws_yuv2rgb_get_func_ptr(c);
2338        }
2339
2340        if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT))
2341        {
2342            c->swScale= yvu9toyv12Wrapper;
2343        }
2344
2345        /* bgr24toYV12 */
2346        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
2347            c->swScale= bgr24toyv12Wrapper;
2348
2349        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2350        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2351           && (isBGR(dstFormat) || isRGB(dstFormat))
2352           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2353           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2354           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2355           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2356           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2357           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2358           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2359           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2360                                             && dstFormat != PIX_FMT_RGB32_1
2361                                             && dstFormat != PIX_FMT_BGR32_1
2362           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2363             c->swScale= rgb2rgbWrapper;
2364
2365        if ((usePal(srcFormat) && (
2366                 dstFormat == PIX_FMT_RGB32   ||
2367                 dstFormat == PIX_FMT_RGB32_1 ||
2368                 dstFormat == PIX_FMT_RGB24   ||
2369                 dstFormat == PIX_FMT_BGR32   ||
2370                 dstFormat == PIX_FMT_BGR32_1 ||
2371                 dstFormat == PIX_FMT_BGR24)))
2372             c->swScale= pal2rgbWrapper;
2373
2374        if (srcFormat == PIX_FMT_YUV422P)
2375        {
2376            if (dstFormat == PIX_FMT_YUYV422)
2377                c->swScale= YUV422PToYuy2Wrapper;
2378            else if (dstFormat == PIX_FMT_UYVY422)
2379                c->swScale= YUV422PToUyvyWrapper;
2380        }
2381
2382        /* LQ converters if -sws 0 or -sws 4*/
2383        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2384            /* yv12_to_yuy2 */
2385            if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
2386            {
2387                if (dstFormat == PIX_FMT_YUYV422)
2388                    c->swScale= PlanarToYuy2Wrapper;
2389                else if (dstFormat == PIX_FMT_UYVY422)
2390                    c->swScale= PlanarToUyvyWrapper;
2391            }
2392        }
2393
2394#ifdef COMPILE_ALTIVEC
2395        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2396            !(c->flags & SWS_BITEXACT) &&
2397            srcFormat == PIX_FMT_YUV420P) {
2398          // unscaled YV12 -> packed YUV, we want speed
2399          if (dstFormat == PIX_FMT_YUYV422)
2400              c->swScale= yv12toyuy2_unscaled_altivec;
2401          else if (dstFormat == PIX_FMT_UYVY422)
2402              c->swScale= yv12touyvy_unscaled_altivec;
2403        }
2404#endif
2405
2406        /* simple copy */
2407        if (  srcFormat == dstFormat
2408            || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
2409            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2410            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
2411        {
2412            if (isPacked(c->srcFormat))
2413                c->swScale= packedCopy;
2414            else /* Planar YUV or gray */
2415                c->swScale= planarCopy;
2416        }
2417
2418        /* gray16{le,be} conversions */
2419        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2420        {
2421            c->swScale= gray16togray;
2422        }
2423        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2424        {
2425            c->swScale= graytogray16;
2426        }
2427        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2428        {
2429            c->swScale= gray16swap;
2430        }
2431
2432#if ARCH_BFIN
2433        if (flags & SWS_CPU_CAPS_BFIN)
2434            ff_bfin_get_unscaled_swscale (c);
2435#endif
2436
2437        if (c->swScale){
2438            if (flags&SWS_PRINT_INFO)
2439                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
2440                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2441            return c;
2442        }
2443    }
2444
2445    if (flags & SWS_CPU_CAPS_MMX2)
2446    {
2447        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2448        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2449        {
2450            if (flags&SWS_PRINT_INFO)
2451                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
2452        }
2453        if (usesHFilter) c->canMMX2BeUsed=0;
2454    }
2455    else
2456        c->canMMX2BeUsed=0;
2457
2458    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2459    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2460
2461    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2462    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2463    // n-2 is the last chrominance sample available
2464    // this is not perfect, but no one should notice the difference, the more correct variant
2465    // would be like the vertical one, but that would require some special code for the
2466    // first and last pixel
2467    if (flags&SWS_FAST_BILINEAR)
2468    {
2469        if (c->canMMX2BeUsed)
2470        {
2471            c->lumXInc+= 20;
2472            c->chrXInc+= 20;
2473        }
2474        //we don't use the x86 asm scaler if MMX is available
2475        else if (flags & SWS_CPU_CAPS_MMX)
2476        {
2477            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2478            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2479        }
2480    }
2481
2482    /* precalculate horizontal scaler filter coefficients */
2483    {
2484        const int filterAlign=
2485            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2486            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2487            1;
2488
2489        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2490                   srcW      ,       dstW, filterAlign, 1<<14,
2491                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2492                   srcFilter->lumH, dstFilter->lumH, c->param);
2493        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2494                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2495                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2496                   srcFilter->chrH, dstFilter->chrH, c->param);
2497
2498#define MAX_FUNNY_CODE_SIZE 10000
2499#if defined(COMPILE_MMX2)
2500// can't downscale !!!
2501        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2502        {
2503#ifdef MAP_ANONYMOUS
2504            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2505            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2506#else
2507            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2508            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2509#endif
2510
2511            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2512            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2513            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2514            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2515
2516            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2517            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2518        }
2519#endif /* defined(COMPILE_MMX2) */
2520    } // initialize horizontal stuff
2521
2522
2523
2524    /* precalculate vertical scaler filter coefficients */
2525    {
2526        const int filterAlign=
2527            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2528            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2529            1;
2530
2531        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2532                   srcH      ,        dstH, filterAlign, (1<<12),
2533                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2534                   srcFilter->lumV, dstFilter->lumV, c->param);
2535        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2536                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
2537                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2538                   srcFilter->chrV, dstFilter->chrV, c->param);
2539
2540#if HAVE_ALTIVEC
2541        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2542        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2543
2544        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2545            int j;
2546            short *p = (short *)&c->vYCoeffsBank[i];
2547            for (j=0;j<8;j++)
2548                p[j] = c->vLumFilter[i];
2549        }
2550
2551        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2552            int j;
2553            short *p = (short *)&c->vCCoeffsBank[i];
2554            for (j=0;j<8;j++)
2555                p[j] = c->vChrFilter[i];
2556        }
2557#endif
2558    }
2559
2560    // calculate buffer sizes so that they won't run out while handling these damn slices
2561    c->vLumBufSize= c->vLumFilterSize;
2562    c->vChrBufSize= c->vChrFilterSize;
2563    for (i=0; i<dstH; i++)
2564    {
2565        int chrI= i*c->chrDstH / dstH;
2566        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2567                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2568
2569        nextSlice>>= c->chrSrcVSubSample;
2570        nextSlice<<= c->chrSrcVSubSample;
2571        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2572            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
2573        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2574            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2575    }
2576
2577    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2578    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2579    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2580    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2581    /* align at 16 bytes for AltiVec */
2582    for (i=0; i<c->vLumBufSize; i++)
2583        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2584    for (i=0; i<c->vChrBufSize; i++)
2585        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2586
2587    //try to avoid drawing green stuff between the right end and the stride end
2588    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
2589
2590    assert(2*VOFW == VOF);
2591
2592    assert(c->chrDstH <= dstH);
2593
2594    if (flags&SWS_PRINT_INFO)
2595    {
2596#ifdef DITHER1XBPP
2597        const char *dither= " dithered";
2598#else
2599        const char *dither= "";
2600#endif
2601        if (flags&SWS_FAST_BILINEAR)
2602            av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
2603        else if (flags&SWS_BILINEAR)
2604            av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
2605        else if (flags&SWS_BICUBIC)
2606            av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
2607        else if (flags&SWS_X)
2608            av_log(c, AV_LOG_INFO, "Experimental scaler, ");
2609        else if (flags&SWS_POINT)
2610            av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
2611        else if (flags&SWS_AREA)
2612            av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
2613        else if (flags&SWS_BICUBLIN)
2614            av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
2615        else if (flags&SWS_GAUSS)
2616            av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
2617        else if (flags&SWS_SINC)
2618            av_log(c, AV_LOG_INFO, "Sinc scaler, ");
2619        else if (flags&SWS_LANCZOS)
2620            av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
2621        else if (flags&SWS_SPLINE)
2622            av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
2623        else
2624            av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
2625
2626        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2627            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2628                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2629        else
2630            av_log(c, AV_LOG_INFO, "from %s to %s ",
2631                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2632
2633        if (flags & SWS_CPU_CAPS_MMX2)
2634            av_log(c, AV_LOG_INFO, "using MMX2\n");
2635        else if (flags & SWS_CPU_CAPS_3DNOW)
2636            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2637        else if (flags & SWS_CPU_CAPS_MMX)
2638            av_log(c, AV_LOG_INFO, "using MMX\n");
2639        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2640            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2641        else
2642            av_log(c, AV_LOG_INFO, "using C\n");
2643    }
2644
2645    if (flags & SWS_PRINT_INFO)
2646    {
2647        if (flags & SWS_CPU_CAPS_MMX)
2648        {
2649            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2650                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2651            else
2652            {
2653                if (c->hLumFilterSize==4)
2654                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
2655                else if (c->hLumFilterSize==8)
2656                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
2657                else
2658                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
2659
2660                if (c->hChrFilterSize==4)
2661                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
2662                else if (c->hChrFilterSize==8)
2663                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
2664                else
2665                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
2666            }
2667        }
2668        else
2669        {
2670#if ARCH_X86
2671            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
2672#else
2673            if (flags & SWS_FAST_BILINEAR)
2674                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
2675            else
2676                av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
2677#endif
2678        }
2679        if (isPlanarYUV(dstFormat))
2680        {
2681            if (c->vLumFilterSize==1)
2682                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2683            else
2684                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2685        }
2686        else
2687        {
2688            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2689                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2690                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2691            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2692                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2693            else
2694                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2695        }
2696
2697        if (dstFormat==PIX_FMT_BGR24)
2698            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
2699                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2700        else if (dstFormat==PIX_FMT_RGB32)
2701            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2702        else if (dstFormat==PIX_FMT_BGR565)
2703            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2704        else if (dstFormat==PIX_FMT_BGR555)
2705            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2706
2707        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2708    }
2709    if (flags & SWS_PRINT_INFO)
2710    {
2711        av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2712               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2713        av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2714               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2715    }
2716
2717    c->swScale= getSwsFunc(flags);
2718    return c;
2719}
2720
2721/**
2722 * swscale wrapper, so we don't need to export the SwsContext.
2723 * Assumes planar YUV to be in YUV order instead of YVU.
2724 */
2725int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2726              int srcSliceH, uint8_t* dst[], int dstStride[]){
2727    int i;
2728    uint8_t* src2[4]= {src[0], src[1], src[2]};
2729
2730    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2731        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
2732        return 0;
2733    }
2734    if (c->sliceDir == 0) {
2735        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2736    }
2737
2738    if (usePal(c->srcFormat)){
2739        for (i=0; i<256; i++){
2740            int p, r, g, b,y,u,v;
2741            if(c->srcFormat == PIX_FMT_PAL8){
2742                p=((uint32_t*)(src[1]))[i];
2743                r= (p>>16)&0xFF;
2744                g= (p>> 8)&0xFF;
2745                b=  p     &0xFF;
2746            }else if(c->srcFormat == PIX_FMT_RGB8){
2747                r= (i>>5    )*36;
2748                g= ((i>>2)&7)*36;
2749                b= (i&3     )*85;
2750            }else if(c->srcFormat == PIX_FMT_BGR8){
2751                b= (i>>6    )*85;
2752                g= ((i>>3)&7)*36;
2753                r= (i&7     )*36;
2754            }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){
2755                r= (i>>3    )*255;
2756                g= ((i>>1)&3)*85;
2757                b= (i&1     )*255;
2758            }else {
2759                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
2760                b= (i>>3    )*255;
2761                g= ((i>>1)&3)*85;
2762                r= (i&1     )*255;
2763            }
2764            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2765            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2766            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2767            c->pal_yuv[i]= y + (u<<8) + (v<<16);
2768
2769
2770            switch(c->dstFormat) {
2771            case PIX_FMT_BGR32:
2772#ifndef WORDS_BIGENDIAN
2773            case PIX_FMT_RGB24:
2774#endif
2775                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
2776                break;
2777            case PIX_FMT_BGR32_1:
2778#ifdef  WORDS_BIGENDIAN
2779            case PIX_FMT_BGR24:
2780#endif
2781                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
2782                break;
2783            case PIX_FMT_RGB32_1:
2784#ifdef  WORDS_BIGENDIAN
2785            case PIX_FMT_RGB24:
2786#endif
2787                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
2788                break;
2789            case PIX_FMT_RGB32:
2790#ifndef WORDS_BIGENDIAN
2791            case PIX_FMT_BGR24:
2792#endif
2793            default:
2794                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
2795            }
2796        }
2797    }
2798
2799    // copy strides, so they can safely be modified
2800    if (c->sliceDir == 1) {
2801        // slices go from top to bottom
2802        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2803        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2804        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2805    } else {
2806        // slices go from bottom to top => we flip the image internally
2807        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2808                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2809                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2810        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2811        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2812
2813        src2[0] += (srcSliceH-1)*srcStride[0];
2814        if (!usePal(c->srcFormat))
2815            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2816        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2817
2818        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2819    }
2820}
2821
2822#if LIBSWSCALE_VERSION_MAJOR < 1
2823int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2824                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2825    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2826}
2827#endif
2828
2829SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2830                                float lumaSharpen, float chromaSharpen,
2831                                float chromaHShift, float chromaVShift,
2832                                int verbose)
2833{
2834    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2835
2836    if (lumaGBlur!=0.0){
2837        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2838        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2839    }else{
2840        filter->lumH= sws_getIdentityVec();
2841        filter->lumV= sws_getIdentityVec();
2842    }
2843
2844    if (chromaGBlur!=0.0){
2845        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2846        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2847    }else{
2848        filter->chrH= sws_getIdentityVec();
2849        filter->chrV= sws_getIdentityVec();
2850    }
2851
2852    if (chromaSharpen!=0.0){
2853        SwsVector *id= sws_getIdentityVec();
2854        sws_scaleVec(filter->chrH, -chromaSharpen);
2855        sws_scaleVec(filter->chrV, -chromaSharpen);
2856        sws_addVec(filter->chrH, id);
2857        sws_addVec(filter->chrV, id);
2858        sws_freeVec(id);
2859    }
2860
2861    if (lumaSharpen!=0.0){
2862        SwsVector *id= sws_getIdentityVec();
2863        sws_scaleVec(filter->lumH, -lumaSharpen);
2864        sws_scaleVec(filter->lumV, -lumaSharpen);
2865        sws_addVec(filter->lumH, id);
2866        sws_addVec(filter->lumV, id);
2867        sws_freeVec(id);
2868    }
2869
2870    if (chromaHShift != 0.0)
2871        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2872
2873    if (chromaVShift != 0.0)
2874        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2875
2876    sws_normalizeVec(filter->chrH, 1.0);
2877    sws_normalizeVec(filter->chrV, 1.0);
2878    sws_normalizeVec(filter->lumH, 1.0);
2879    sws_normalizeVec(filter->lumV, 1.0);
2880
2881    if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
2882    if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
2883
2884    return filter;
2885}
2886
2887SwsVector *sws_getGaussianVec(double variance, double quality){
2888    const int length= (int)(variance*quality + 0.5) | 1;
2889    int i;
2890    double *coeff= av_malloc(length*sizeof(double));
2891    double middle= (length-1)*0.5;
2892    SwsVector *vec= av_malloc(sizeof(SwsVector));
2893
2894    vec->coeff= coeff;
2895    vec->length= length;
2896
2897    for (i=0; i<length; i++)
2898    {
2899        double dist= i-middle;
2900        coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
2901    }
2902
2903    sws_normalizeVec(vec, 1.0);
2904
2905    return vec;
2906}
2907
2908SwsVector *sws_getConstVec(double c, int length){
2909    int i;
2910    double *coeff= av_malloc(length*sizeof(double));
2911    SwsVector *vec= av_malloc(sizeof(SwsVector));
2912
2913    vec->coeff= coeff;
2914    vec->length= length;
2915
2916    for (i=0; i<length; i++)
2917        coeff[i]= c;
2918
2919    return vec;
2920}
2921
2922
2923SwsVector *sws_getIdentityVec(void){
2924    return sws_getConstVec(1.0, 1);
2925}
2926
2927double sws_dcVec(SwsVector *a){
2928    int i;
2929    double sum=0;
2930
2931    for (i=0; i<a->length; i++)
2932        sum+= a->coeff[i];
2933
2934    return sum;
2935}
2936
2937void sws_scaleVec(SwsVector *a, double scalar){
2938    int i;
2939
2940    for (i=0; i<a->length; i++)
2941        a->coeff[i]*= scalar;
2942}
2943
2944void sws_normalizeVec(SwsVector *a, double height){
2945    sws_scaleVec(a, height/sws_dcVec(a));
2946}
2947
2948static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2949    int length= a->length + b->length - 1;
2950    double *coeff= av_malloc(length*sizeof(double));
2951    int i, j;
2952    SwsVector *vec= av_malloc(sizeof(SwsVector));
2953
2954    vec->coeff= coeff;
2955    vec->length= length;
2956
2957    for (i=0; i<length; i++) coeff[i]= 0.0;
2958
2959    for (i=0; i<a->length; i++)
2960    {
2961        for (j=0; j<b->length; j++)
2962        {
2963            coeff[i+j]+= a->coeff[i]*b->coeff[j];
2964        }
2965    }
2966
2967    return vec;
2968}
2969
2970static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2971    int length= FFMAX(a->length, b->length);
2972    double *coeff= av_malloc(length*sizeof(double));
2973    int i;
2974    SwsVector *vec= av_malloc(sizeof(SwsVector));
2975
2976    vec->coeff= coeff;
2977    vec->length= length;
2978
2979    for (i=0; i<length; i++) coeff[i]= 0.0;
2980
2981    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2982    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2983
2984    return vec;
2985}
2986
2987static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2988    int length= FFMAX(a->length, b->length);
2989    double *coeff= av_malloc(length*sizeof(double));
2990    int i;
2991    SwsVector *vec= av_malloc(sizeof(SwsVector));
2992
2993    vec->coeff= coeff;
2994    vec->length= length;
2995
2996    for (i=0; i<length; i++) coeff[i]= 0.0;
2997
2998    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2999    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
3000
3001    return vec;
3002}
3003
3004/* shift left / or right if "shift" is negative */
3005static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
3006    int length= a->length + FFABS(shift)*2;
3007    double *coeff= av_malloc(length*sizeof(double));
3008    int i;
3009    SwsVector *vec= av_malloc(sizeof(SwsVector));
3010
3011    vec->coeff= coeff;
3012    vec->length= length;
3013
3014    for (i=0; i<length; i++) coeff[i]= 0.0;
3015
3016    for (i=0; i<a->length; i++)
3017    {
3018        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
3019    }
3020
3021    return vec;
3022}
3023
3024void sws_shiftVec(SwsVector *a, int shift){
3025    SwsVector *shifted= sws_getShiftedVec(a, shift);
3026    av_free(a->coeff);
3027    a->coeff= shifted->coeff;
3028    a->length= shifted->length;
3029    av_free(shifted);
3030}
3031
3032void sws_addVec(SwsVector *a, SwsVector *b){
3033    SwsVector *sum= sws_sumVec(a, b);
3034    av_free(a->coeff);
3035    a->coeff= sum->coeff;
3036    a->length= sum->length;
3037    av_free(sum);
3038}
3039
3040void sws_subVec(SwsVector *a, SwsVector *b){
3041    SwsVector *diff= sws_diffVec(a, b);
3042    av_free(a->coeff);
3043    a->coeff= diff->coeff;
3044    a->length= diff->length;
3045    av_free(diff);
3046}
3047
3048void sws_convVec(SwsVector *a, SwsVector *b){
3049    SwsVector *conv= sws_getConvVec(a, b);
3050    av_free(a->coeff);
3051    a->coeff= conv->coeff;
3052    a->length= conv->length;
3053    av_free(conv);
3054}
3055
3056SwsVector *sws_cloneVec(SwsVector *a){
3057    double *coeff= av_malloc(a->length*sizeof(double));
3058    int i;
3059    SwsVector *vec= av_malloc(sizeof(SwsVector));
3060
3061    vec->coeff= coeff;
3062    vec->length= a->length;
3063
3064    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
3065
3066    return vec;
3067}
3068
3069void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
3070    int i;
3071    double max=0;
3072    double min=0;
3073    double range;
3074
3075    for (i=0; i<a->length; i++)
3076        if (a->coeff[i]>max) max= a->coeff[i];
3077
3078    for (i=0; i<a->length; i++)
3079        if (a->coeff[i]<min) min= a->coeff[i];
3080
3081    range= max - min;
3082
3083    for (i=0; i<a->length; i++)
3084    {
3085        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
3086        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
3087        for (;x>0; x--) av_log(log_ctx, log_level, " ");
3088        av_log(log_ctx, log_level, "|\n");
3089    }
3090}
3091
3092#if LIBSWSCALE_VERSION_MAJOR < 1
3093void sws_printVec(SwsVector *a){
3094    sws_printVec2(a, NULL, AV_LOG_DEBUG);
3095}
3096#endif
3097
3098void sws_freeVec(SwsVector *a){
3099    if (!a) return;
3100    av_freep(&a->coeff);
3101    a->length=0;
3102    av_free(a);
3103}
3104
3105void sws_freeFilter(SwsFilter *filter){
3106    if (!filter) return;
3107
3108    if (filter->lumH) sws_freeVec(filter->lumH);
3109    if (filter->lumV) sws_freeVec(filter->lumV);
3110    if (filter->chrH) sws_freeVec(filter->chrH);
3111    if (filter->chrV) sws_freeVec(filter->chrV);
3112    av_free(filter);
3113}
3114
3115
3116void sws_freeContext(SwsContext *c){
3117    int i;
3118    if (!c) return;
3119
3120    if (c->lumPixBuf)
3121    {
3122        for (i=0; i<c->vLumBufSize; i++)
3123            av_freep(&c->lumPixBuf[i]);
3124        av_freep(&c->lumPixBuf);
3125    }
3126
3127    if (c->chrPixBuf)
3128    {
3129        for (i=0; i<c->vChrBufSize; i++)
3130            av_freep(&c->chrPixBuf[i]);
3131        av_freep(&c->chrPixBuf);
3132    }
3133
3134    av_freep(&c->vLumFilter);
3135    av_freep(&c->vChrFilter);
3136    av_freep(&c->hLumFilter);
3137    av_freep(&c->hChrFilter);
3138#if HAVE_ALTIVEC
3139    av_freep(&c->vYCoeffsBank);
3140    av_freep(&c->vCCoeffsBank);
3141#endif
3142
3143    av_freep(&c->vLumFilterPos);
3144    av_freep(&c->vChrFilterPos);
3145    av_freep(&c->hLumFilterPos);
3146    av_freep(&c->hChrFilterPos);
3147
3148#if ARCH_X86 && CONFIG_GPL
3149#ifdef MAP_ANONYMOUS
3150    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
3151    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
3152#else
3153    av_free(c->funnyYCode);
3154    av_free(c->funnyUVCode);
3155#endif
3156    c->funnyYCode=NULL;
3157    c->funnyUVCode=NULL;
3158#endif /* ARCH_X86 && CONFIG_GPL */
3159
3160    av_freep(&c->lumMmx2Filter);
3161    av_freep(&c->chrMmx2Filter);
3162    av_freep(&c->lumMmx2FilterPos);
3163    av_freep(&c->chrMmx2FilterPos);
3164    av_freep(&c->yuvTable);
3165
3166    av_free(c);
3167}
3168
3169struct SwsContext *sws_getCachedContext(struct SwsContext *context,
3170                                        int srcW, int srcH, enum PixelFormat srcFormat,
3171                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
3172                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
3173{
3174    static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
3175
3176    if (!param)
3177        param = default_param;
3178
3179    if (context) {
3180        if (context->srcW != srcW || context->srcH != srcH ||
3181            context->srcFormat != srcFormat ||
3182            context->dstW != dstW || context->dstH != dstH ||
3183            context->dstFormat != dstFormat || context->flags != flags ||
3184            context->param[0] != param[0] || context->param[1] != param[1])
3185        {
3186            sws_freeContext(context);
3187            context = NULL;
3188        }
3189    }
3190    if (!context) {
3191        return sws_getContext(srcW, srcH, srcFormat,
3192                              dstW, dstH, dstFormat, flags,
3193                              srcFilter, dstFilter, param);
3194    }
3195    return context;
3196}
3197
3198