1/*
2 * software YUV to RGB converter
3 *
4 * Copyright (C) 2009 Konstantin Shishkov
5 *
6 * 1,4,8bpp support and context / deglobalize stuff
7 * by Michael Niedermayer (michaelni@gmx.at)
8 *
9 * This file is part of Libav.
10 *
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <inttypes.h>
29#include <assert.h>
30
31#include "config.h"
32#include "rgb2rgb.h"
33#include "swscale.h"
34#include "swscale_internal.h"
35#include "libavutil/cpu.h"
36#include "libavutil/bswap.h"
37
38extern const uint8_t dither_4x4_16[4][8];
39extern const uint8_t dither_8x8_32[8][8];
40extern const uint8_t dither_8x8_73[8][8];
41extern const uint8_t dither_8x8_220[8][8];
42
43const int32_t ff_yuv2rgb_coeffs[8][4] = {
44    {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
45    {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
46    {104597, 132201, 25675, 53279}, /* unspecified */
47    {104597, 132201, 25675, 53279}, /* reserved */
48    {104448, 132798, 24759, 53109}, /* FCC */
49    {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
50    {104597, 132201, 25675, 53279}, /* SMPTE 170M */
51    {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
52};
53
54const int *sws_getCoefficients(int colorspace)
55{
56    if (colorspace > 7 || colorspace < 0)
57        colorspace = SWS_CS_DEFAULT;
58    return ff_yuv2rgb_coeffs[colorspace];
59}
60
61#define LOADCHROMA(i)                               \
62    U = pu[i];                                      \
63    V = pv[i];                                      \
64    r = (void *)c->table_rV[V];                     \
65    g = (void *)(c->table_gU[U] + c->table_gV[V]);  \
66    b = (void *)c->table_bU[U];
67
68#define PUTRGB(dst,src,i)            \
69    Y = src[2*i];                    \
70    dst[2*i  ] = r[Y] + g[Y] + b[Y]; \
71    Y = src[2*i+1];                  \
72    dst[2*i+1] = r[Y] + g[Y] + b[Y];
73
74#define PUTRGB24(dst,src,i)                                  \
75    Y = src[2*i];                                            \
76    dst[6*i+0] = r[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = b[Y]; \
77    Y = src[2*i+1];                                          \
78    dst[6*i+3] = r[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = b[Y];
79
80#define PUTBGR24(dst,src,i)                                  \
81    Y = src[2*i];                                            \
82    dst[6*i+0] = b[Y]; dst[6*i+1] = g[Y]; dst[6*i+2] = r[Y]; \
83    Y = src[2*i+1];                                          \
84    dst[6*i+3] = b[Y]; dst[6*i+4] = g[Y]; dst[6*i+5] = r[Y];
85
86#define PUTRGBA(dst,ysrc,asrc,i,s)                      \
87    Y = ysrc[2*i];                                      \
88    dst[2*i  ] = r[Y] + g[Y] + b[Y] + (asrc[2*i  ]<<s); \
89    Y = ysrc[2*i+1];                                    \
90    dst[2*i+1] = r[Y] + g[Y] + b[Y] + (asrc[2*i+1]<<s);
91
92#define PUTRGB48(dst,src,i)             \
93    Y = src[2*i];                       \
94    dst[12*i+ 0] = dst[12*i+ 1] = r[Y]; \
95    dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
96    dst[12*i+ 4] = dst[12*i+ 5] = b[Y]; \
97    Y = src[2*i+1];                     \
98    dst[12*i+ 6] = dst[12*i+ 7] = r[Y]; \
99    dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
100    dst[12*i+10] = dst[12*i+11] = b[Y];
101
102#define PUTBGR48(dst,src,i)             \
103    Y = src[2*i];                       \
104    dst[12*i+ 0] = dst[12*i+ 1] = b[Y]; \
105    dst[12*i+ 2] = dst[12*i+ 3] = g[Y]; \
106    dst[12*i+ 4] = dst[12*i+ 5] = r[Y]; \
107    Y = src[2*i+1];                     \
108    dst[12*i+ 6] = dst[12*i+ 7] = b[Y]; \
109    dst[12*i+ 8] = dst[12*i+ 9] = g[Y]; \
110    dst[12*i+10] = dst[12*i+11] = r[Y];
111
112#define YUV2RGBFUNC(func_name, dst_type, alpha) \
113static int func_name(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, \
114                     int srcSliceH, uint8_t* dst[], int dstStride[]) \
115{\
116    int y;\
117\
118    if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
119        srcStride[1] *= 2;\
120        srcStride[2] *= 2;\
121    }\
122    for (y=0; y<srcSliceH; y+=2) {\
123        dst_type *dst_1 = (dst_type*)(dst[0] + (y+srcSliceY  )*dstStride[0]);\
124        dst_type *dst_2 = (dst_type*)(dst[0] + (y+srcSliceY+1)*dstStride[0]);\
125        dst_type av_unused *r, *b;\
126        dst_type *g;\
127        const uint8_t *py_1 = src[0] + y*srcStride[0];\
128        const uint8_t *py_2 = py_1 + srcStride[0];\
129        const uint8_t *pu = src[1] + (y>>1)*srcStride[1];\
130        const uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
131        const uint8_t av_unused *pa_1, *pa_2;\
132        unsigned int h_size = c->dstW>>3;\
133        if (alpha) {\
134            pa_1 = src[3] + y*srcStride[3];\
135            pa_2 = pa_1 + srcStride[3];\
136        }\
137        while (h_size--) {\
138            int av_unused U, V;\
139            int Y;\
140
141#define ENDYUV2RGBLINE(dst_delta)\
142            pu += 4;\
143            pv += 4;\
144            py_1 += 8;\
145            py_2 += 8;\
146            dst_1 += dst_delta;\
147            dst_2 += dst_delta;\
148        }\
149        if (c->dstW & 4) {\
150            int av_unused Y, U, V;\
151
152#define ENDYUV2RGBFUNC()\
153        }\
154    }\
155    return srcSliceH;\
156}
157
158#define CLOSEYUV2RGBFUNC(dst_delta)\
159    ENDYUV2RGBLINE(dst_delta)\
160    ENDYUV2RGBFUNC()
161
162YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
163    LOADCHROMA(0);
164    PUTRGB48(dst_1,py_1,0);
165    PUTRGB48(dst_2,py_2,0);
166
167    LOADCHROMA(1);
168    PUTRGB48(dst_2,py_2,1);
169    PUTRGB48(dst_1,py_1,1);
170
171    LOADCHROMA(2);
172    PUTRGB48(dst_1,py_1,2);
173    PUTRGB48(dst_2,py_2,2);
174
175    LOADCHROMA(3);
176    PUTRGB48(dst_2,py_2,3);
177    PUTRGB48(dst_1,py_1,3);
178ENDYUV2RGBLINE(48)
179    LOADCHROMA(0);
180    PUTRGB48(dst_1,py_1,0);
181    PUTRGB48(dst_2,py_2,0);
182
183    LOADCHROMA(1);
184    PUTRGB48(dst_2,py_2,1);
185    PUTRGB48(dst_1,py_1,1);
186ENDYUV2RGBFUNC()
187
188YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
189    LOADCHROMA(0);
190    PUTBGR48(dst_1,py_1,0);
191    PUTBGR48(dst_2,py_2,0);
192
193    LOADCHROMA(1);
194    PUTBGR48(dst_2,py_2,1);
195    PUTBGR48(dst_1,py_1,1);
196
197    LOADCHROMA(2);
198    PUTBGR48(dst_1,py_1,2);
199    PUTBGR48(dst_2,py_2,2);
200
201    LOADCHROMA(3);
202    PUTBGR48(dst_2,py_2,3);
203    PUTBGR48(dst_1,py_1,3);
204ENDYUV2RGBLINE(48)
205    LOADCHROMA(0);
206    PUTBGR48(dst_1,py_1,0);
207    PUTBGR48(dst_2,py_2,0);
208
209    LOADCHROMA(1);
210    PUTBGR48(dst_2,py_2,1);
211    PUTBGR48(dst_1,py_1,1);
212ENDYUV2RGBFUNC()
213
214YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
215    LOADCHROMA(0);
216    PUTRGB(dst_1,py_1,0);
217    PUTRGB(dst_2,py_2,0);
218
219    LOADCHROMA(1);
220    PUTRGB(dst_2,py_2,1);
221    PUTRGB(dst_1,py_1,1);
222
223    LOADCHROMA(2);
224    PUTRGB(dst_1,py_1,2);
225    PUTRGB(dst_2,py_2,2);
226
227    LOADCHROMA(3);
228    PUTRGB(dst_2,py_2,3);
229    PUTRGB(dst_1,py_1,3);
230ENDYUV2RGBLINE(8)
231    LOADCHROMA(0);
232    PUTRGB(dst_1,py_1,0);
233    PUTRGB(dst_2,py_2,0);
234
235    LOADCHROMA(1);
236    PUTRGB(dst_2,py_2,1);
237    PUTRGB(dst_1,py_1,1);
238ENDYUV2RGBFUNC()
239
240YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
241    LOADCHROMA(0);
242    PUTRGBA(dst_1,py_1,pa_1,0,24);
243    PUTRGBA(dst_2,py_2,pa_2,0,24);
244
245    LOADCHROMA(1);
246    PUTRGBA(dst_2,py_2,pa_1,1,24);
247    PUTRGBA(dst_1,py_1,pa_2,1,24);
248
249    LOADCHROMA(2);
250    PUTRGBA(dst_1,py_1,pa_1,2,24);
251    PUTRGBA(dst_2,py_2,pa_2,2,24);
252
253    LOADCHROMA(3);
254    PUTRGBA(dst_2,py_2,pa_1,3,24);
255    PUTRGBA(dst_1,py_1,pa_2,3,24);
256    pa_1 += 8;\
257    pa_2 += 8;\
258ENDYUV2RGBLINE(8)
259    LOADCHROMA(0);
260    PUTRGBA(dst_1,py_1,pa_1,0,24);
261    PUTRGBA(dst_2,py_2,pa_2,0,24);
262
263    LOADCHROMA(1);
264    PUTRGBA(dst_2,py_2,pa_1,1,24);
265    PUTRGBA(dst_1,py_1,pa_2,1,24);
266ENDYUV2RGBFUNC()
267
268YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
269    LOADCHROMA(0);
270    PUTRGBA(dst_1,py_1,pa_1,0,0);
271    PUTRGBA(dst_2,py_2,pa_2,0,0);
272
273    LOADCHROMA(1);
274    PUTRGBA(dst_2,py_2,pa_2,1,0);
275    PUTRGBA(dst_1,py_1,pa_1,1,0);
276
277    LOADCHROMA(2);
278    PUTRGBA(dst_1,py_1,pa_1,2,0);
279    PUTRGBA(dst_2,py_2,pa_2,2,0);
280
281    LOADCHROMA(3);
282    PUTRGBA(dst_2,py_2,pa_2,3,0);
283    PUTRGBA(dst_1,py_1,pa_1,3,0);
284    pa_1 += 8;\
285    pa_2 += 8;\
286ENDYUV2RGBLINE(8)
287    LOADCHROMA(0);
288    PUTRGBA(dst_1,py_1,pa_1,0,0);
289    PUTRGBA(dst_2,py_2,pa_2,0,0);
290
291    LOADCHROMA(1);
292    PUTRGBA(dst_2,py_2,pa_2,1,0);
293    PUTRGBA(dst_1,py_1,pa_1,1,0);
294ENDYUV2RGBFUNC()
295
296YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
297    LOADCHROMA(0);
298    PUTRGB24(dst_1,py_1,0);
299    PUTRGB24(dst_2,py_2,0);
300
301    LOADCHROMA(1);
302    PUTRGB24(dst_2,py_2,1);
303    PUTRGB24(dst_1,py_1,1);
304
305    LOADCHROMA(2);
306    PUTRGB24(dst_1,py_1,2);
307    PUTRGB24(dst_2,py_2,2);
308
309    LOADCHROMA(3);
310    PUTRGB24(dst_2,py_2,3);
311    PUTRGB24(dst_1,py_1,3);
312ENDYUV2RGBLINE(24)
313    LOADCHROMA(0);
314    PUTRGB24(dst_1,py_1,0);
315    PUTRGB24(dst_2,py_2,0);
316
317    LOADCHROMA(1);
318    PUTRGB24(dst_2,py_2,1);
319    PUTRGB24(dst_1,py_1,1);
320ENDYUV2RGBFUNC()
321
322// only trivial mods from yuv2rgb_c_24_rgb
323YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
324    LOADCHROMA(0);
325    PUTBGR24(dst_1,py_1,0);
326    PUTBGR24(dst_2,py_2,0);
327
328    LOADCHROMA(1);
329    PUTBGR24(dst_2,py_2,1);
330    PUTBGR24(dst_1,py_1,1);
331
332    LOADCHROMA(2);
333    PUTBGR24(dst_1,py_1,2);
334    PUTBGR24(dst_2,py_2,2);
335
336    LOADCHROMA(3);
337    PUTBGR24(dst_2,py_2,3);
338    PUTBGR24(dst_1,py_1,3);
339ENDYUV2RGBLINE(24)
340    LOADCHROMA(0);
341    PUTBGR24(dst_1,py_1,0);
342    PUTBGR24(dst_2,py_2,0);
343
344    LOADCHROMA(1);
345    PUTBGR24(dst_2,py_2,1);
346    PUTBGR24(dst_1,py_1,1);
347ENDYUV2RGBFUNC()
348
349// This is exactly the same code as yuv2rgb_c_32 except for the types of
350// r, g, b, dst_1, dst_2
351YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
352    LOADCHROMA(0);
353    PUTRGB(dst_1,py_1,0);
354    PUTRGB(dst_2,py_2,0);
355
356    LOADCHROMA(1);
357    PUTRGB(dst_2,py_2,1);
358    PUTRGB(dst_1,py_1,1);
359
360    LOADCHROMA(2);
361    PUTRGB(dst_1,py_1,2);
362    PUTRGB(dst_2,py_2,2);
363
364    LOADCHROMA(3);
365    PUTRGB(dst_2,py_2,3);
366    PUTRGB(dst_1,py_1,3);
367CLOSEYUV2RGBFUNC(8)
368
369// r, g, b, dst_1, dst_2
370YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
371    const uint8_t *d16 = dither_4x4_16[y&3];
372#define PUTRGB12(dst,src,i,o)                                   \
373    Y = src[2*i];                                               \
374    dst[2*i]   = r[Y+d16[0+o]] + g[Y+d16[0+o]] + b[Y+d16[0+o]]; \
375    Y = src[2*i+1];                                             \
376    dst[2*i+1] = r[Y+d16[1+o]] + g[Y+d16[1+o]] + b[Y+d16[1+o]];
377
378    LOADCHROMA(0);
379    PUTRGB12(dst_1,py_1,0,0);
380    PUTRGB12(dst_2,py_2,0,0+8);
381
382    LOADCHROMA(1);
383    PUTRGB12(dst_2,py_2,1,2+8);
384    PUTRGB12(dst_1,py_1,1,2);
385
386    LOADCHROMA(2);
387    PUTRGB12(dst_1,py_1,2,4);
388    PUTRGB12(dst_2,py_2,2,4+8);
389
390    LOADCHROMA(3);
391    PUTRGB12(dst_2,py_2,3,6+8);
392    PUTRGB12(dst_1,py_1,3,6);
393CLOSEYUV2RGBFUNC(8)
394
395// r, g, b, dst_1, dst_2
396YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
397    const uint8_t *d32 = dither_8x8_32[y&7];
398    const uint8_t *d64 = dither_8x8_73[y&7];
399#define PUTRGB8(dst,src,i,o)                                    \
400    Y = src[2*i];                                               \
401    dst[2*i]   = r[Y+d32[0+o]] + g[Y+d32[0+o]] + b[Y+d64[0+o]]; \
402    Y = src[2*i+1];                                             \
403    dst[2*i+1] = r[Y+d32[1+o]] + g[Y+d32[1+o]] + b[Y+d64[1+o]];
404
405    LOADCHROMA(0);
406    PUTRGB8(dst_1,py_1,0,0);
407    PUTRGB8(dst_2,py_2,0,0+8);
408
409    LOADCHROMA(1);
410    PUTRGB8(dst_2,py_2,1,2+8);
411    PUTRGB8(dst_1,py_1,1,2);
412
413    LOADCHROMA(2);
414    PUTRGB8(dst_1,py_1,2,4);
415    PUTRGB8(dst_2,py_2,2,4+8);
416
417    LOADCHROMA(3);
418    PUTRGB8(dst_2,py_2,3,6+8);
419    PUTRGB8(dst_1,py_1,3,6);
420CLOSEYUV2RGBFUNC(8)
421
422YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
423    const uint8_t *d64 =  dither_8x8_73[y&7];
424    const uint8_t *d128 = dither_8x8_220[y&7];
425    int acc;
426
427#define PUTRGB4D(dst,src,i,o)                                     \
428    Y = src[2*i];                                                 \
429    acc = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]];        \
430    Y = src[2*i+1];                                               \
431    acc |= (r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]])<<4;  \
432    dst[i]= acc;
433
434    LOADCHROMA(0);
435    PUTRGB4D(dst_1,py_1,0,0);
436    PUTRGB4D(dst_2,py_2,0,0+8);
437
438    LOADCHROMA(1);
439    PUTRGB4D(dst_2,py_2,1,2+8);
440    PUTRGB4D(dst_1,py_1,1,2);
441
442    LOADCHROMA(2);
443    PUTRGB4D(dst_1,py_1,2,4);
444    PUTRGB4D(dst_2,py_2,2,4+8);
445
446    LOADCHROMA(3);
447    PUTRGB4D(dst_2,py_2,3,6+8);
448    PUTRGB4D(dst_1,py_1,3,6);
449CLOSEYUV2RGBFUNC(4)
450
451YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
452    const uint8_t *d64 =  dither_8x8_73[y&7];
453    const uint8_t *d128 = dither_8x8_220[y&7];
454
455#define PUTRGB4DB(dst,src,i,o)                                    \
456    Y = src[2*i];                                                 \
457    dst[2*i]   = r[Y+d128[0+o]] + g[Y+d64[0+o]] + b[Y+d128[0+o]]; \
458    Y = src[2*i+1];                                               \
459    dst[2*i+1] = r[Y+d128[1+o]] + g[Y+d64[1+o]] + b[Y+d128[1+o]];
460
461    LOADCHROMA(0);
462    PUTRGB4DB(dst_1,py_1,0,0);
463    PUTRGB4DB(dst_2,py_2,0,0+8);
464
465    LOADCHROMA(1);
466    PUTRGB4DB(dst_2,py_2,1,2+8);
467    PUTRGB4DB(dst_1,py_1,1,2);
468
469    LOADCHROMA(2);
470    PUTRGB4DB(dst_1,py_1,2,4);
471    PUTRGB4DB(dst_2,py_2,2,4+8);
472
473    LOADCHROMA(3);
474    PUTRGB4DB(dst_2,py_2,3,6+8);
475    PUTRGB4DB(dst_1,py_1,3,6);
476CLOSEYUV2RGBFUNC(8)
477
478YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
479        const uint8_t *d128 = dither_8x8_220[y&7];
480        char out_1 = 0, out_2 = 0;
481        g= c->table_gU[128] + c->table_gV[128];
482
483#define PUTRGB1(out,src,i,o)    \
484    Y = src[2*i];               \
485    out+= out + g[Y+d128[0+o]]; \
486    Y = src[2*i+1];             \
487    out+= out + g[Y+d128[1+o]];
488
489    PUTRGB1(out_1,py_1,0,0);
490    PUTRGB1(out_2,py_2,0,0+8);
491
492    PUTRGB1(out_2,py_2,1,2+8);
493    PUTRGB1(out_1,py_1,1,2);
494
495    PUTRGB1(out_1,py_1,2,4);
496    PUTRGB1(out_2,py_2,2,4+8);
497
498    PUTRGB1(out_2,py_2,3,6+8);
499    PUTRGB1(out_1,py_1,3,6);
500
501    dst_1[0]= out_1;
502    dst_2[0]= out_2;
503CLOSEYUV2RGBFUNC(1)
504
505SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
506{
507    SwsFunc t = NULL;
508
509    if (HAVE_MMX) {
510        t = ff_yuv2rgb_init_mmx(c);
511    } else if (HAVE_VIS) {
512        t = ff_yuv2rgb_init_vis(c);
513    } else if (CONFIG_MLIB) {
514        t = ff_yuv2rgb_init_mlib(c);
515    } else if (HAVE_ALTIVEC) {
516        t = ff_yuv2rgb_init_altivec(c);
517    } else if (ARCH_BFIN) {
518        t = ff_yuv2rgb_get_func_ptr_bfin(c);
519    }
520
521    if (t)
522        return t;
523
524    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
525
526    switch (c->dstFormat) {
527    case PIX_FMT_BGR48BE:
528    case PIX_FMT_BGR48LE:    return yuv2rgb_c_bgr48;
529    case PIX_FMT_RGB48BE:
530    case PIX_FMT_RGB48LE:    return yuv2rgb_c_48;
531    case PIX_FMT_ARGB:
532    case PIX_FMT_ABGR:       if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c;
533    case PIX_FMT_RGBA:
534    case PIX_FMT_BGRA:       return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32;
535    case PIX_FMT_RGB24:      return yuv2rgb_c_24_rgb;
536    case PIX_FMT_BGR24:      return yuv2rgb_c_24_bgr;
537    case PIX_FMT_RGB565:
538    case PIX_FMT_BGR565:
539    case PIX_FMT_RGB555:
540    case PIX_FMT_BGR555:     return yuv2rgb_c_16;
541    case PIX_FMT_RGB444:
542    case PIX_FMT_BGR444:     return yuv2rgb_c_12_ordered_dither;
543    case PIX_FMT_RGB8:
544    case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
545    case PIX_FMT_RGB4:
546    case PIX_FMT_BGR4:       return yuv2rgb_c_4_ordered_dither;
547    case PIX_FMT_RGB4_BYTE:
548    case PIX_FMT_BGR4_BYTE:  return yuv2rgb_c_4b_ordered_dither;
549    case PIX_FMT_MONOBLACK:  return yuv2rgb_c_1_ordered_dither;
550    default:
551        assert(0);
552    }
553    return NULL;
554}
555
556static void fill_table(uint8_t* table[256], const int elemsize, const int inc, void *y_tab)
557{
558    int i;
559    int64_t cb = 0;
560    uint8_t *y_table = y_tab;
561
562    y_table -= elemsize * (inc >> 9);
563
564    for (i = 0; i < 256; i++) {
565        table[i] = y_table + elemsize * (cb >> 16);
566        cb += inc;
567    }
568}
569
570static void fill_gv_table(int table[256], const int elemsize, const int inc)
571{
572    int i;
573    int64_t cb = 0;
574    int off = -(inc >> 9);
575
576    for (i = 0; i < 256; i++) {
577        table[i] = elemsize * (off + (cb >> 16));
578        cb += inc;
579    }
580}
581
582static uint16_t roundToInt16(int64_t f)
583{
584    int r= (f + (1<<15))>>16;
585         if (r<-0x7FFF) return 0x8000;
586    else if (r> 0x7FFF) return 0x7FFF;
587    else                return r;
588}
589
590av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange,
591                                     int brightness, int contrast, int saturation)
592{
593    const int isRgb =      c->dstFormat==PIX_FMT_RGB32
594                        || c->dstFormat==PIX_FMT_RGB32_1
595                        || c->dstFormat==PIX_FMT_BGR24
596                        || c->dstFormat==PIX_FMT_RGB565BE
597                        || c->dstFormat==PIX_FMT_RGB565LE
598                        || c->dstFormat==PIX_FMT_RGB555BE
599                        || c->dstFormat==PIX_FMT_RGB555LE
600                        || c->dstFormat==PIX_FMT_RGB444BE
601                        || c->dstFormat==PIX_FMT_RGB444LE
602                        || c->dstFormat==PIX_FMT_RGB8
603                        || c->dstFormat==PIX_FMT_RGB4
604                        || c->dstFormat==PIX_FMT_RGB4_BYTE
605                        || c->dstFormat==PIX_FMT_MONOBLACK;
606    const int isNotNe =    c->dstFormat==PIX_FMT_NE(RGB565LE,RGB565BE)
607                        || c->dstFormat==PIX_FMT_NE(RGB555LE,RGB555BE)
608                        || c->dstFormat==PIX_FMT_NE(RGB444LE,RGB444BE)
609                        || c->dstFormat==PIX_FMT_NE(BGR565LE,BGR565BE)
610                        || c->dstFormat==PIX_FMT_NE(BGR555LE,BGR555BE)
611                        || c->dstFormat==PIX_FMT_NE(BGR444LE,BGR444BE);
612    const int bpp = c->dstFormatBpp;
613    uint8_t *y_table;
614    uint16_t *y_table16;
615    uint32_t *y_table32;
616    int i, base, rbase, gbase, bbase, abase, needAlpha;
617    const int yoffs = fullRange ? 384 : 326;
618
619    int64_t crv =  inv_table[0];
620    int64_t cbu =  inv_table[1];
621    int64_t cgu = -inv_table[2];
622    int64_t cgv = -inv_table[3];
623    int64_t cy  = 1<<16;
624    int64_t oy  = 0;
625
626    int64_t yb = 0;
627
628    if (!fullRange) {
629        cy = (cy*255) / 219;
630        oy = 16<<16;
631    } else {
632        crv = (crv*224) / 255;
633        cbu = (cbu*224) / 255;
634        cgu = (cgu*224) / 255;
635        cgv = (cgv*224) / 255;
636    }
637
638    cy  = (cy *contrast             ) >> 16;
639    crv = (crv*contrast * saturation) >> 32;
640    cbu = (cbu*contrast * saturation) >> 32;
641    cgu = (cgu*contrast * saturation) >> 32;
642    cgv = (cgv*contrast * saturation) >> 32;
643    oy -= 256*brightness;
644
645    c->uOffset=   0x0400040004000400LL;
646    c->vOffset=   0x0400040004000400LL;
647    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
648    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
649    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
650    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
651    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
652    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
653
654    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
655    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
656    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
657    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
658    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
659    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
660
661    //scale coefficients by cy
662    crv = ((crv << 16) + 0x8000) / cy;
663    cbu = ((cbu << 16) + 0x8000) / cy;
664    cgu = ((cgu << 16) + 0x8000) / cy;
665    cgv = ((cgv << 16) + 0x8000) / cy;
666
667    av_free(c->yuvTable);
668
669    switch (bpp) {
670    case 1:
671        c->yuvTable = av_malloc(1024);
672        y_table = c->yuvTable;
673        yb = -(384<<16) - oy;
674        for (i = 0; i < 1024-110; i++) {
675            y_table[i+110] = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
676            yb += cy;
677        }
678        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
679        fill_gv_table(c->table_gV, 1, cgv);
680        break;
681    case 4:
682    case 4|128:
683        rbase = isRgb ? 3 : 0;
684        gbase = 1;
685        bbase = isRgb ? 0 : 3;
686        c->yuvTable = av_malloc(1024*3);
687        y_table = c->yuvTable;
688        yb = -(384<<16) - oy;
689        for (i = 0; i < 1024-110; i++) {
690            int yval = av_clip_uint8((yb + 0x8000) >> 16);
691            y_table[i+110     ] =  (yval >> 7)       << rbase;
692            y_table[i+ 37+1024] = ((yval + 43) / 85) << gbase;
693            y_table[i+110+2048] =  (yval >> 7)       << bbase;
694            yb += cy;
695        }
696        fill_table(c->table_rV, 1, crv, y_table + yoffs);
697        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
698        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
699        fill_gv_table(c->table_gV, 1, cgv);
700        break;
701    case 8:
702        rbase = isRgb ? 5 : 0;
703        gbase = isRgb ? 2 : 3;
704        bbase = isRgb ? 0 : 6;
705        c->yuvTable = av_malloc(1024*3);
706        y_table = c->yuvTable;
707        yb = -(384<<16) - oy;
708        for (i = 0; i < 1024-38; i++) {
709            int yval = av_clip_uint8((yb + 0x8000) >> 16);
710            y_table[i+16     ] = ((yval + 18) / 36) << rbase;
711            y_table[i+16+1024] = ((yval + 18) / 36) << gbase;
712            y_table[i+37+2048] = ((yval + 43) / 85) << bbase;
713            yb += cy;
714        }
715        fill_table(c->table_rV, 1, crv, y_table + yoffs);
716        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
717        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
718        fill_gv_table(c->table_gV, 1, cgv);
719        break;
720    case 12:
721        rbase = isRgb ? 8 : 0;
722        gbase = 4;
723        bbase = isRgb ? 0 : 8;
724        c->yuvTable = av_malloc(1024*3*2);
725        y_table16 = c->yuvTable;
726        yb = -(384<<16) - oy;
727        for (i = 0; i < 1024; i++) {
728            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
729            y_table16[i     ] = (yval >> 4) << rbase;
730            y_table16[i+1024] = (yval >> 4) << gbase;
731            y_table16[i+2048] = (yval >> 4) << bbase;
732            yb += cy;
733        }
734        if (isNotNe)
735            for (i = 0; i < 1024*3; i++)
736                y_table16[i] = av_bswap16(y_table16[i]);
737        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
738        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
739        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
740        fill_gv_table(c->table_gV, 2, cgv);
741        break;
742    case 15:
743    case 16:
744        rbase = isRgb ? bpp - 5 : 0;
745        gbase = 5;
746        bbase = isRgb ? 0 : (bpp - 5);
747        c->yuvTable = av_malloc(1024*3*2);
748        y_table16 = c->yuvTable;
749        yb = -(384<<16) - oy;
750        for (i = 0; i < 1024; i++) {
751            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
752            y_table16[i     ] = (yval >> 3)          << rbase;
753            y_table16[i+1024] = (yval >> (18 - bpp)) << gbase;
754            y_table16[i+2048] = (yval >> 3)          << bbase;
755            yb += cy;
756        }
757        if(isNotNe)
758            for (i = 0; i < 1024*3; i++)
759                y_table16[i] = av_bswap16(y_table16[i]);
760        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
761        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
762        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
763        fill_gv_table(c->table_gV, 2, cgv);
764        break;
765    case 24:
766    case 48:
767        c->yuvTable = av_malloc(1024);
768        y_table = c->yuvTable;
769        yb = -(384<<16) - oy;
770        for (i = 0; i < 1024; i++) {
771            y_table[i] = av_clip_uint8((yb + 0x8000) >> 16);
772            yb += cy;
773        }
774        fill_table(c->table_rV, 1, crv, y_table + yoffs);
775        fill_table(c->table_gU, 1, cgu, y_table + yoffs);
776        fill_table(c->table_bU, 1, cbu, y_table + yoffs);
777        fill_gv_table(c->table_gV, 1, cgv);
778        break;
779    case 32:
780        base = (c->dstFormat == PIX_FMT_RGB32_1 || c->dstFormat == PIX_FMT_BGR32_1) ? 8 : 0;
781        rbase = base + (isRgb ? 16 : 0);
782        gbase = base + 8;
783        bbase = base + (isRgb ? 0 : 16);
784        needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
785        if (!needAlpha)
786            abase = (base + 24) & 31;
787        c->yuvTable = av_malloc(1024*3*4);
788        y_table32 = c->yuvTable;
789        yb = -(384<<16) - oy;
790        for (i = 0; i < 1024; i++) {
791            unsigned yval = av_clip_uint8((yb + 0x8000) >> 16);
792            y_table32[i     ] = (yval << rbase) + (needAlpha ? 0 : (255u << abase));
793            y_table32[i+1024] = yval << gbase;
794            y_table32[i+2048] = yval << bbase;
795            yb += cy;
796        }
797        fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
798        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
799        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
800        fill_gv_table(c->table_gV, 4, cgv);
801        break;
802    default:
803        c->yuvTable = NULL;
804        av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
805        return -1;
806    }
807    return 0;
808}
809