1/*
2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 *              software YUV to YUV converter
5 *              software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
9 *
10 * This file is part of Libav.
11 *
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27#include <stddef.h>
28
29static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, int src_size)
30{
31    uint8_t *dest = dst;
32    const uint8_t *s = src;
33    const uint8_t *end;
34    end = s + src_size;
35
36    while (s < end) {
37#if HAVE_BIGENDIAN
38        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
39        *dest++ = 255;
40        *dest++ = s[2];
41        *dest++ = s[1];
42        *dest++ = s[0];
43        s+=3;
44#else
45        *dest++ = *s++;
46        *dest++ = *s++;
47        *dest++ = *s++;
48        *dest++ = 255;
49#endif
50    }
51}
52
53static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
54{
55    uint8_t *dest = dst;
56    const uint8_t *s = src;
57    const uint8_t *end;
58
59    end = s + src_size;
60
61    while (s < end) {
62#if HAVE_BIGENDIAN
63        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
64        s++;
65        dest[2] = *s++;
66        dest[1] = *s++;
67        dest[0] = *s++;
68        dest += 3;
69#else
70        *dest++ = *s++;
71        *dest++ = *s++;
72        *dest++ = *s++;
73        s++;
74#endif
75    }
76}
77
78/*
79 original by Strepto/Astral
80 ported to gcc & bugfixed: A'rpi
81 MMX2, 3DNOW optimization by Nick Kurshev
82 32-bit C version, and and&add trick by Michael Niedermayer
83*/
84static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, int src_size)
85{
86    register const uint8_t* s=src;
87    register uint8_t* d=dst;
88    register const uint8_t *end;
89    const uint8_t *mm_end;
90    end = s + src_size;
91    mm_end = end - 3;
92    while (s < mm_end) {
93        register unsigned x= *((const uint32_t *)s);
94        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
95        d+=4;
96        s+=4;
97    }
98    if (s < end) {
99        register unsigned short x= *((const uint16_t *)s);
100        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
101    }
102}
103
104static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, int src_size)
105{
106    register const uint8_t* s=src;
107    register uint8_t* d=dst;
108    register const uint8_t *end;
109    const uint8_t *mm_end;
110    end = s + src_size;
111
112    mm_end = end - 3;
113    while (s < mm_end) {
114        register uint32_t x= *((const uint32_t*)s);
115        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
116        s+=4;
117        d+=4;
118    }
119    if (s < end) {
120        register uint16_t x= *((const uint16_t*)s);
121        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
122    }
123}
124
125static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, int src_size)
126{
127    const uint8_t *s = src;
128    const uint8_t *end;
129    uint16_t *d = (uint16_t *)dst;
130    end = s + src_size;
131
132    while (s < end) {
133        register int rgb = *(const uint32_t*)s; s += 4;
134        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
135    }
136}
137
138static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
139{
140    const uint8_t *s = src;
141    const uint8_t *end;
142    uint16_t *d = (uint16_t *)dst;
143    end = s + src_size;
144    while (s < end) {
145        register int rgb = *(const uint32_t*)s; s += 4;
146        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
147    }
148}
149
150static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, int src_size)
151{
152    const uint8_t *s = src;
153    const uint8_t *end;
154    uint16_t *d = (uint16_t *)dst;
155    end = s + src_size;
156    while (s < end) {
157        register int rgb = *(const uint32_t*)s; s += 4;
158        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
159    }
160}
161
162static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
163{
164    const uint8_t *s = src;
165    const uint8_t *end;
166    uint16_t *d = (uint16_t *)dst;
167    end = s + src_size;
168    while (s < end) {
169        register int rgb = *(const uint32_t*)s; s += 4;
170        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
171    }
172}
173
174static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, int src_size)
175{
176    const uint8_t *s = src;
177    const uint8_t *end;
178    uint16_t *d = (uint16_t *)dst;
179    end = s + src_size;
180    while (s < end) {
181        const int b = *s++;
182        const int g = *s++;
183        const int r = *s++;
184        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
185    }
186}
187
188static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, int src_size)
189{
190    const uint8_t *s = src;
191    const uint8_t *end;
192    uint16_t *d = (uint16_t *)dst;
193    end = s + src_size;
194    while (s < end) {
195        const int r = *s++;
196        const int g = *s++;
197        const int b = *s++;
198        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
199    }
200}
201
202static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, int src_size)
203{
204    const uint8_t *s = src;
205    const uint8_t *end;
206    uint16_t *d = (uint16_t *)dst;
207    end = s + src_size;
208    while (s < end) {
209        const int b = *s++;
210        const int g = *s++;
211        const int r = *s++;
212        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
213    }
214}
215
216static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
217{
218    const uint8_t *s = src;
219    const uint8_t *end;
220    uint16_t *d = (uint16_t *)dst;
221    end = s + src_size;
222    while (s < end) {
223        const int r = *s++;
224        const int g = *s++;
225        const int b = *s++;
226        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
227    }
228}
229
230/*
231  I use less accurate approximation here by simply left-shifting the input
232  value and filling the low order bits with zeroes. This method improves PNG
233  compression but this scheme cannot reproduce white exactly, since it does
234  not generate an all-ones maximum value; the net effect is to darken the
235  image slightly.
236
237  The better method should be "left bit replication":
238
239   4 3 2 1 0
240   ---------
241   1 1 0 1 1
242
243   7 6 5 4 3  2 1 0
244   ----------------
245   1 1 0 1 1  1 1 0
246   |=======|  |===|
247       |      leftmost bits repeated to fill open bits
248       |
249   original bits
250*/
251static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
252{
253    const uint16_t *end;
254    uint8_t *d = dst;
255    const uint16_t *s = (const uint16_t*)src;
256    end = s + src_size/2;
257    while (s < end) {
258        register uint16_t bgr;
259        bgr = *s++;
260        *d++ = (bgr&0x1F)<<3;
261        *d++ = (bgr&0x3E0)>>2;
262        *d++ = (bgr&0x7C00)>>7;
263    }
264}
265
266static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
267{
268    const uint16_t *end;
269    uint8_t *d = (uint8_t *)dst;
270    const uint16_t *s = (const uint16_t *)src;
271    end = s + src_size/2;
272    while (s < end) {
273        register uint16_t bgr;
274        bgr = *s++;
275        *d++ = (bgr&0x1F)<<3;
276        *d++ = (bgr&0x7E0)>>3;
277        *d++ = (bgr&0xF800)>>8;
278    }
279}
280
281static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
282{
283    const uint16_t *end;
284    uint8_t *d = dst;
285    const uint16_t *s = (const uint16_t *)src;
286    end = s + src_size/2;
287    while (s < end) {
288        register uint16_t bgr;
289        bgr = *s++;
290#if HAVE_BIGENDIAN
291        *d++ = 255;
292        *d++ = (bgr&0x7C00)>>7;
293        *d++ = (bgr&0x3E0)>>2;
294        *d++ = (bgr&0x1F)<<3;
295#else
296        *d++ = (bgr&0x1F)<<3;
297        *d++ = (bgr&0x3E0)>>2;
298        *d++ = (bgr&0x7C00)>>7;
299        *d++ = 255;
300#endif
301    }
302}
303
304static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
305{
306    const uint16_t *end;
307    uint8_t *d = dst;
308    const uint16_t *s = (const uint16_t*)src;
309    end = s + src_size/2;
310    while (s < end) {
311        register uint16_t bgr;
312        bgr = *s++;
313#if HAVE_BIGENDIAN
314        *d++ = 255;
315        *d++ = (bgr&0xF800)>>8;
316        *d++ = (bgr&0x7E0)>>3;
317        *d++ = (bgr&0x1F)<<3;
318#else
319        *d++ = (bgr&0x1F)<<3;
320        *d++ = (bgr&0x7E0)>>3;
321        *d++ = (bgr&0xF800)>>8;
322        *d++ = 255;
323#endif
324    }
325}
326
327static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, int src_size)
328{
329    int idx = 15 - src_size;
330    const uint8_t *s = src-idx;
331    uint8_t *d = dst-idx;
332    for (; idx<15; idx+=4) {
333        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
334        v &= 0xff00ff;
335        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
336    }
337}
338
339static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
340{
341    unsigned i;
342    for (i=0; i<src_size; i+=3) {
343        register uint8_t x;
344        x          = src[i + 2];
345        dst[i + 1] = src[i + 1];
346        dst[i + 2] = src[i + 0];
347        dst[i + 0] = x;
348    }
349}
350
351static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
352                                     const uint8_t *vsrc, uint8_t *dst,
353                                     int width, int height,
354                                     int lumStride, int chromStride,
355                                     int dstStride, int vertLumPerChroma)
356{
357    int y;
358    const int chromWidth = width >> 1;
359    for (y=0; y<height; y++) {
360#if HAVE_FAST_64BIT
361        int i;
362        uint64_t *ldst = (uint64_t *) dst;
363        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
364        for (i = 0; i < chromWidth; i += 2) {
365            uint64_t k, l;
366            k = yc[0] + (uc[0] << 8) +
367                (yc[1] << 16) + (vc[0] << 24);
368            l = yc[2] + (uc[1] << 8) +
369                (yc[3] << 16) + (vc[1] << 24);
370            *ldst++ = k + (l << 32);
371            yc += 4;
372            uc += 2;
373            vc += 2;
374        }
375
376#else
377        int i, *idst = (int32_t *) dst;
378        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
379        for (i = 0; i < chromWidth; i++) {
380#if HAVE_BIGENDIAN
381            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
382                (yc[1] << 8) + (vc[0] << 0);
383#else
384            *idst++ = yc[0] + (uc[0] << 8) +
385                (yc[1] << 16) + (vc[0] << 24);
386#endif
387            yc += 2;
388            uc++;
389            vc++;
390        }
391#endif
392        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
393            usrc += chromStride;
394            vsrc += chromStride;
395        }
396        ysrc += lumStride;
397        dst  += dstStride;
398    }
399}
400
401/**
402 * Height should be a multiple of 2 and width should be a multiple of 16.
403 * (If this is a problem for anyone then tell me, and I will fix it.)
404 */
405static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
406                                const uint8_t *vsrc, uint8_t *dst,
407                                int width, int height,
408                                int lumStride, int chromStride,
409                                int dstStride)
410{
411    //FIXME interpolate chroma
412    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
413                      chromStride, dstStride, 2);
414}
415
416static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
417                                     const uint8_t *vsrc, uint8_t *dst,
418                                     int width, int height,
419                                     int lumStride, int chromStride,
420                                     int dstStride, int vertLumPerChroma)
421{
422    int y;
423    const int chromWidth = width >> 1;
424    for (y=0; y<height; y++) {
425#if HAVE_FAST_64BIT
426        int i;
427        uint64_t *ldst = (uint64_t *) dst;
428        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
429        for (i = 0; i < chromWidth; i += 2) {
430            uint64_t k, l;
431            k = uc[0] + (yc[0] << 8) +
432                (vc[0] << 16) + (yc[1] << 24);
433            l = uc[1] + (yc[2] << 8) +
434                (vc[1] << 16) + (yc[3] << 24);
435            *ldst++ = k + (l << 32);
436            yc += 4;
437            uc += 2;
438            vc += 2;
439        }
440
441#else
442        int i, *idst = (int32_t *) dst;
443        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
444        for (i = 0; i < chromWidth; i++) {
445#if HAVE_BIGENDIAN
446            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
447                (vc[0] << 8) + (yc[1] << 0);
448#else
449            *idst++ = uc[0] + (yc[0] << 8) +
450               (vc[0] << 16) + (yc[1] << 24);
451#endif
452            yc += 2;
453            uc++;
454            vc++;
455        }
456#endif
457        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
458            usrc += chromStride;
459            vsrc += chromStride;
460        }
461        ysrc += lumStride;
462        dst += dstStride;
463    }
464}
465
466/**
467 * Height should be a multiple of 2 and width should be a multiple of 16
468 * (If this is a problem for anyone then tell me, and I will fix it.)
469 */
470static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
471                                const uint8_t *vsrc, uint8_t *dst,
472                                int width, int height,
473                                int lumStride, int chromStride,
474                                int dstStride)
475{
476    //FIXME interpolate chroma
477    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
478                      chromStride, dstStride, 2);
479}
480
481/**
482 * Width should be a multiple of 16.
483 */
484static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
485                                   const uint8_t *vsrc, uint8_t *dst,
486                                   int width, int height,
487                                   int lumStride, int chromStride,
488                                   int dstStride)
489{
490    yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
491                      chromStride, dstStride, 1);
492}
493
494/**
495 * Width should be a multiple of 16.
496 */
497static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
498                                   const uint8_t *vsrc, uint8_t *dst,
499                                   int width, int height,
500                                   int lumStride, int chromStride,
501                                   int dstStride)
502{
503    yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride,
504                      chromStride, dstStride, 1);
505}
506
507/**
508 * Height should be a multiple of 2 and width should be a multiple of 16.
509 * (If this is a problem for anyone then tell me, and I will fix it.)
510 */
511static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
512                                uint8_t *udst, uint8_t *vdst,
513                                int width, int height,
514                                int lumStride, int chromStride,
515                                int srcStride)
516{
517    int y;
518    const int chromWidth = width >> 1;
519    for (y=0; y<height; y+=2) {
520        int i;
521        for (i=0; i<chromWidth; i++) {
522            ydst[2*i+0]     = src[4*i+0];
523            udst[i]     = src[4*i+1];
524            ydst[2*i+1]     = src[4*i+2];
525            vdst[i]     = src[4*i+3];
526        }
527        ydst += lumStride;
528        src  += srcStride;
529
530        for (i=0; i<chromWidth; i++) {
531            ydst[2*i+0]     = src[4*i+0];
532            ydst[2*i+1]     = src[4*i+2];
533        }
534        udst += chromStride;
535        vdst += chromStride;
536        ydst += lumStride;
537        src  += srcStride;
538    }
539}
540
541static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
542                              int srcHeight, int srcStride, int dstStride)
543{
544    int x,y;
545
546    dst[0]= src[0];
547
548    // first line
549    for (x=0; x<srcWidth-1; x++) {
550        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
551        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
552    }
553    dst[2*srcWidth-1]= src[srcWidth-1];
554
555    dst+= dstStride;
556
557    for (y=1; y<srcHeight; y++) {
558        const int mmxSize = 1;
559
560        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
561        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
562
563        for (x=mmxSize-1; x<srcWidth-1; x++) {
564            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
565            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
566            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
567            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
568        }
569        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
570        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
571
572        dst+=dstStride*2;
573        src+=srcStride;
574    }
575
576    // last line
577    dst[0]= src[0];
578
579    for (x=0; x<srcWidth-1; x++) {
580        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
581        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
582    }
583    dst[2*srcWidth-1]= src[srcWidth-1];
584}
585
586/**
587 * Height should be a multiple of 2 and width should be a multiple of 16.
588 * (If this is a problem for anyone then tell me, and I will fix it.)
589 * Chrominance data is only taken from every second line, others are ignored.
590 * FIXME: Write HQ version.
591 */
592static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
593                                uint8_t *udst, uint8_t *vdst,
594                                int width, int height,
595                                int lumStride, int chromStride,
596                                int srcStride)
597{
598    int y;
599    const int chromWidth = width >> 1;
600    for (y=0; y<height; y+=2) {
601        int i;
602        for (i=0; i<chromWidth; i++) {
603            udst[i]     = src[4*i+0];
604            ydst[2*i+0] = src[4*i+1];
605            vdst[i]     = src[4*i+2];
606            ydst[2*i+1] = src[4*i+3];
607        }
608        ydst += lumStride;
609        src  += srcStride;
610
611        for (i=0; i<chromWidth; i++) {
612            ydst[2*i+0] = src[4*i+1];
613            ydst[2*i+1] = src[4*i+3];
614        }
615        udst += chromStride;
616        vdst += chromStride;
617        ydst += lumStride;
618        src  += srcStride;
619    }
620}
621
622/**
623 * Height should be a multiple of 2 and width should be a multiple of 2.
624 * (If this is a problem for anyone then tell me, and I will fix it.)
625 * Chrominance data is only taken from every second line,
626 * others are ignored in the C version.
627 * FIXME: Write HQ version.
628 */
629void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
630                   uint8_t *vdst, int width, int height, int lumStride,
631                   int chromStride, int srcStride)
632{
633    int y;
634    const int chromWidth = width >> 1;
635    y=0;
636    for (; y<height; y+=2) {
637        int i;
638        for (i=0; i<chromWidth; i++) {
639            unsigned int b = src[6*i+0];
640            unsigned int g = src[6*i+1];
641            unsigned int r = src[6*i+2];
642
643            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
644            unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
645            unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
646
647            udst[i]     = U;
648            vdst[i]     = V;
649            ydst[2*i]   = Y;
650
651            b = src[6*i+3];
652            g = src[6*i+4];
653            r = src[6*i+5];
654
655            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
656            ydst[2*i+1]     = Y;
657        }
658        ydst += lumStride;
659        src  += srcStride;
660
661        for (i=0; i<chromWidth; i++) {
662            unsigned int b = src[6*i+0];
663            unsigned int g = src[6*i+1];
664            unsigned int r = src[6*i+2];
665
666            unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
667
668            ydst[2*i]     = Y;
669
670            b = src[6*i+3];
671            g = src[6*i+4];
672            r = src[6*i+5];
673
674            Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
675            ydst[2*i+1]     = Y;
676        }
677        udst += chromStride;
678        vdst += chromStride;
679        ydst += lumStride;
680        src  += srcStride;
681    }
682}
683
684static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
685                              uint8_t *dest, int width,
686                              int height, int src1Stride,
687                              int src2Stride, int dstStride)
688{
689    int h;
690
691    for (h=0; h < height; h++) {
692        int w;
693        for (w=0; w < width; w++) {
694            dest[2*w+0] = src1[w];
695            dest[2*w+1] = src2[w];
696        }
697        dest += dstStride;
698        src1 += src1Stride;
699        src2 += src2Stride;
700    }
701}
702
703static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
704                                 uint8_t *dst1, uint8_t *dst2,
705                                 int width, int height,
706                                 int srcStride1, int srcStride2,
707                                 int dstStride1, int dstStride2)
708{
709    int y;
710    int x,w,h;
711    w=width/2; h=height/2;
712    for (y=0;y<h;y++) {
713        const uint8_t* s1=src1+srcStride1*(y>>1);
714        uint8_t* d=dst1+dstStride1*y;
715        x=0;
716        for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
717    }
718    for (y=0;y<h;y++) {
719        const uint8_t* s2=src2+srcStride2*(y>>1);
720        uint8_t* d=dst2+dstStride2*y;
721        x=0;
722        for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
723    }
724}
725
726static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
727                                  const uint8_t *src3, uint8_t *dst,
728                                  int width, int height,
729                                  int srcStride1, int srcStride2,
730                                  int srcStride3, int dstStride)
731{
732    int x;
733    int y,w,h;
734    w=width/2; h=height;
735    for (y=0;y<h;y++) {
736        const uint8_t* yp=src1+srcStride1*y;
737        const uint8_t* up=src2+srcStride2*(y>>2);
738        const uint8_t* vp=src3+srcStride3*(y>>2);
739        uint8_t* d=dst+dstStride*y;
740        x=0;
741        for (; x<w; x++) {
742            const int x2 = x<<2;
743            d[8*x+0] = yp[x2];
744            d[8*x+1] = up[x];
745            d[8*x+2] = yp[x2+1];
746            d[8*x+3] = vp[x];
747            d[8*x+4] = yp[x2+2];
748            d[8*x+5] = up[x];
749            d[8*x+6] = yp[x2+3];
750            d[8*x+7] = vp[x];
751        }
752    }
753}
754
755static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
756{
757    dst +=   count;
758    src += 2*count;
759    count= - count;
760
761    while(count<0) {
762        dst[count]= src[2*count];
763        count++;
764    }
765}
766
767static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
768                            int count)
769{
770    dst0+=   count;
771    dst1+=   count;
772    src += 4*count;
773    count= - count;
774    while(count<0) {
775        dst0[count]= src[4*count+0];
776        dst1[count]= src[4*count+2];
777        count++;
778    }
779}
780
781static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1,
782                               uint8_t *dst0, uint8_t *dst1, int count)
783{
784    dst0 +=   count;
785    dst1 +=   count;
786    src0 += 4*count;
787    src1 += 4*count;
788    count= - count;
789    while(count<0) {
790        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
791        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
792        count++;
793    }
794}
795
796static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1,
797                           int count)
798{
799    dst0+=   count;
800    dst1+=   count;
801    src += 4*count;
802    count= - count;
803    src++;
804    while(count<0) {
805        dst0[count]= src[4*count+0];
806        dst1[count]= src[4*count+2];
807        count++;
808    }
809}
810
811static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1,
812                              uint8_t *dst0, uint8_t *dst1, int count)
813{
814    dst0 +=   count;
815    dst1 +=   count;
816    src0 += 4*count;
817    src1 += 4*count;
818    count= - count;
819    src0++;
820    src1++;
821    while(count<0) {
822        dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
823        dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
824        count++;
825    }
826}
827
828static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
829                           const uint8_t *src, int width, int height,
830                           int lumStride, int chromStride, int srcStride)
831{
832    int y;
833    const int chromWidth= -((-width)>>1);
834
835    for (y=0; y<height; y++) {
836        extract_even_c(src, ydst, width);
837        if(y&1) {
838            extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth);
839            udst+= chromStride;
840            vdst+= chromStride;
841        }
842
843        src += srcStride;
844        ydst+= lumStride;
845    }
846}
847
848static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
849                           const uint8_t *src, int width, int height,
850                           int lumStride, int chromStride, int srcStride)
851{
852    int y;
853    const int chromWidth= -((-width)>>1);
854
855    for (y=0; y<height; y++) {
856        extract_even_c(src, ydst, width);
857        extract_odd2_c(src, udst, vdst, chromWidth);
858
859        src += srcStride;
860        ydst+= lumStride;
861        udst+= chromStride;
862        vdst+= chromStride;
863    }
864}
865
866static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
867                           const uint8_t *src, int width, int height,
868                           int lumStride, int chromStride, int srcStride)
869{
870    int y;
871    const int chromWidth= -((-width)>>1);
872
873    for (y=0; y<height; y++) {
874        extract_even_c(src + 1, ydst, width);
875        if(y&1) {
876            extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth);
877            udst+= chromStride;
878            vdst+= chromStride;
879        }
880
881        src += srcStride;
882        ydst+= lumStride;
883    }
884}
885
886static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
887                           const uint8_t *src, int width, int height,
888                           int lumStride, int chromStride, int srcStride)
889{
890    int y;
891    const int chromWidth= -((-width)>>1);
892
893    for (y=0; y<height; y++) {
894        extract_even_c(src + 1, ydst, width);
895        extract_even2_c(src, udst, vdst, chromWidth);
896
897        src += srcStride;
898        ydst+= lumStride;
899        udst+= chromStride;
900        vdst+= chromStride;
901    }
902}
903
904static inline void rgb2rgb_init_c(void)
905{
906    rgb15to16          = rgb15to16_c;
907    rgb15tobgr24       = rgb15tobgr24_c;
908    rgb15to32          = rgb15to32_c;
909    rgb16tobgr24       = rgb16tobgr24_c;
910    rgb16to32          = rgb16to32_c;
911    rgb16to15          = rgb16to15_c;
912    rgb24tobgr16       = rgb24tobgr16_c;
913    rgb24tobgr15       = rgb24tobgr15_c;
914    rgb24tobgr32       = rgb24tobgr32_c;
915    rgb32to16          = rgb32to16_c;
916    rgb32to15          = rgb32to15_c;
917    rgb32tobgr24       = rgb32tobgr24_c;
918    rgb24to15          = rgb24to15_c;
919    rgb24to16          = rgb24to16_c;
920    rgb24tobgr24       = rgb24tobgr24_c;
921    shuffle_bytes_2103 = shuffle_bytes_2103_c;
922    rgb32tobgr16       = rgb32tobgr16_c;
923    rgb32tobgr15       = rgb32tobgr15_c;
924    yv12toyuy2         = yv12toyuy2_c;
925    yv12touyvy         = yv12touyvy_c;
926    yuv422ptoyuy2      = yuv422ptoyuy2_c;
927    yuv422ptouyvy      = yuv422ptouyvy_c;
928    yuy2toyv12         = yuy2toyv12_c;
929    planar2x           = planar2x_c;
930    rgb24toyv12        = rgb24toyv12_c;
931    interleaveBytes    = interleaveBytes_c;
932    vu9_to_vu12        = vu9_to_vu12_c;
933    yvu9_to_yuy2       = yvu9_to_yuy2_c;
934
935    uyvytoyuv420       = uyvytoyuv420_c;
936    uyvytoyuv422       = uyvytoyuv422_c;
937    yuyvtoyuv420       = yuyvtoyuv420_c;
938    yuyvtoyuv422       = yuyvtoyuv422_c;
939}
940