1/*
2 * Copyright (C) 2010 David Conrad
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/x86/cpu.h"
22#include "dsputil_x86.h"
23#include "diracdsp_mmx.h"
24#include "fpel.h"
25
26void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
27void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
28void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
29void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
30
31#define HPEL_FILTER(MMSIZE, EXT)                                                             \
32    void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int);               \
33    void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int);                    \
34                                                                                             \
35    static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,       \
36                                          const uint8_t *src, int stride, int width, int height)   \
37    {                                                                                        \
38        while( height-- )                                                                    \
39        {                                                                                    \
40            ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
41            ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width);                                \
42            ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width);                               \
43                                                                                             \
44            dsth += stride;                                                                  \
45            dstv += stride;                                                                  \
46            dstc += stride;                                                                  \
47            src  += stride;                                                                  \
48        }                                                                                    \
49    }
50
51#if !ARCH_X86_64
52HPEL_FILTER(8, mmx)
53#endif
54HPEL_FILTER(16, sse2)
55
56#define PIXFUNC(PFX, IDX, EXT)                                                   \
57    /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/  \
58    c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
59    c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
60
61#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
62void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
63{\
64    if (h&3)\
65        ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
66    else\
67        OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
68}\
69void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
70{\
71    if (h&3)\
72        ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
73    else\
74        OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
75}\
76void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
77{\
78    if (h&3) {\
79        ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
80    } else {\
81        OPNAME ## _pixels16_ ## EXT(dst   , src[0]   , stride, h);\
82        OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
83    }\
84}
85
86DIRAC_PIXOP(put, ff_put, mmx)
87DIRAC_PIXOP(avg, ff_avg, mmx)
88DIRAC_PIXOP(avg, ff_avg, mmxext)
89
90void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
91{
92    if (h&3)
93        ff_put_dirac_pixels16_c(dst, src, stride, h);
94    else
95    ff_put_pixels16_sse2(dst, src[0], stride, h);
96}
97void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
98{
99    if (h&3)
100        ff_avg_dirac_pixels16_c(dst, src, stride, h);
101    else
102    ff_avg_pixels16_sse2(dst, src[0], stride, h);
103}
104void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
105{
106    if (h&3) {
107        ff_put_dirac_pixels32_c(dst, src, stride, h);
108    } else {
109    ff_put_pixels16_sse2(dst   , src[0]   , stride, h);
110    ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
111    }
112}
113void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
114{
115    if (h&3) {
116        ff_avg_dirac_pixels32_c(dst, src, stride, h);
117    } else {
118    ff_avg_pixels16_sse2(dst   , src[0]   , stride, h);
119    ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
120    }
121}
122
123void ff_diracdsp_init_mmx(DiracDSPContext* c)
124{
125    int mm_flags = av_get_cpu_flags();
126
127    if (EXTERNAL_MMX(mm_flags)) {
128    c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
129#if !ARCH_X86_64
130    c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
131    c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
132    c->dirac_hpel_filter = dirac_hpel_filter_mmx;
133    c->add_rect_clamped = ff_add_rect_clamped_mmx;
134    c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
135#endif
136    PIXFUNC(put, 0, mmx);
137    PIXFUNC(avg, 0, mmx);
138    }
139
140    if (EXTERNAL_MMXEXT(mm_flags)) {
141        PIXFUNC(avg, 0, mmxext);
142    }
143
144    if (EXTERNAL_SSE2(mm_flags)) {
145        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
146        c->add_rect_clamped = ff_add_rect_clamped_sse2;
147        c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
148
149        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
150        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
151
152        c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
153        c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
154        c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
155        c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
156    }
157}
158