1/*
2 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 *
6 * This file is part of Libav.
7 *
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23/**
24 * @file
25 * simpleidct in C.
26 */
27
28/*
29  based upon some outcommented c code from mpeg2dec (idct_mmx.c
30  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
31 */
32
33#include "bit_depth_template.c"
34
35#undef W1
36#undef W2
37#undef W3
38#undef W4
39#undef W5
40#undef W6
41#undef W7
42#undef ROW_SHIFT
43#undef COL_SHIFT
44#undef DC_SHIFT
45#undef MUL
46#undef MAC
47
48#if BIT_DEPTH == 8
49
50#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
55#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
56#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
57
58#define ROW_SHIFT 11
59#define COL_SHIFT 20
60#define DC_SHIFT 3
61
62#define MUL(a, b)    MUL16(a, b)
63#define MAC(a, b, c) MAC16(a, b, c)
64
65#elif BIT_DEPTH == 10
66
67#define W1 90901
68#define W2 85627
69#define W3 77062
70#define W4 65535
71#define W5 51491
72#define W6 35468
73#define W7 18081
74
75#define ROW_SHIFT 15
76#define COL_SHIFT 20
77#define DC_SHIFT 1
78
79#define MUL(a, b)    ((a) * (b))
80#define MAC(a, b, c) ((a) += (b) * (c))
81
82#else
83
84#error "Unsupported bitdepth"
85
86#endif
87
88static inline void FUNC(idctRowCondDC)(DCTELEM *row, int extra_shift)
89{
90    int a0, a1, a2, a3, b0, b1, b2, b3;
91
92#if HAVE_FAST_64BIT
93#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
94    if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
95        uint64_t temp;
96        if (DC_SHIFT - extra_shift > 0) {
97            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
98        } else {
99            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
100        }
101        temp += temp << 16;
102        temp += temp << 32;
103        ((uint64_t *)row)[0] = temp;
104        ((uint64_t *)row)[1] = temp;
105        return;
106    }
107#else
108    if (!(((uint32_t*)row)[1] |
109          ((uint32_t*)row)[2] |
110          ((uint32_t*)row)[3] |
111          row[1])) {
112        uint32_t temp;
113        if (DC_SHIFT - extra_shift > 0) {
114            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
115        } else {
116            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
117        }
118        temp += temp << 16;
119        ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
120            ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
121        return;
122    }
123#endif
124
125    a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
126    a1 = a0;
127    a2 = a0;
128    a3 = a0;
129
130    a0 += W2 * row[2];
131    a1 += W6 * row[2];
132    a2 -= W6 * row[2];
133    a3 -= W2 * row[2];
134
135    b0 = MUL(W1, row[1]);
136    MAC(b0, W3, row[3]);
137    b1 = MUL(W3, row[1]);
138    MAC(b1, -W7, row[3]);
139    b2 = MUL(W5, row[1]);
140    MAC(b2, -W1, row[3]);
141    b3 = MUL(W7, row[1]);
142    MAC(b3, -W5, row[3]);
143
144    if (AV_RN64A(row + 4)) {
145        a0 +=   W4*row[4] + W6*row[6];
146        a1 += - W4*row[4] - W2*row[6];
147        a2 += - W4*row[4] + W2*row[6];
148        a3 +=   W4*row[4] - W6*row[6];
149
150        MAC(b0,  W5, row[5]);
151        MAC(b0,  W7, row[7]);
152
153        MAC(b1, -W1, row[5]);
154        MAC(b1, -W5, row[7]);
155
156        MAC(b2,  W7, row[5]);
157        MAC(b2,  W3, row[7]);
158
159        MAC(b3,  W3, row[5]);
160        MAC(b3, -W1, row[7]);
161    }
162
163    row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
164    row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
165    row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
166    row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
167    row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
168    row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
169    row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
170    row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
171}
172
173#define IDCT_COLS do {                                  \
174        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
175        a1 = a0;                                        \
176        a2 = a0;                                        \
177        a3 = a0;                                        \
178                                                        \
179        a0 +=  W2*col[8*2];                             \
180        a1 +=  W6*col[8*2];                             \
181        a2 += -W6*col[8*2];                             \
182        a3 += -W2*col[8*2];                             \
183                                                        \
184        b0 = MUL(W1, col[8*1]);                         \
185        b1 = MUL(W3, col[8*1]);                         \
186        b2 = MUL(W5, col[8*1]);                         \
187        b3 = MUL(W7, col[8*1]);                         \
188                                                        \
189        MAC(b0,  W3, col[8*3]);                         \
190        MAC(b1, -W7, col[8*3]);                         \
191        MAC(b2, -W1, col[8*3]);                         \
192        MAC(b3, -W5, col[8*3]);                         \
193                                                        \
194        if (col[8*4]) {                                 \
195            a0 +=  W4*col[8*4];                         \
196            a1 += -W4*col[8*4];                         \
197            a2 += -W4*col[8*4];                         \
198            a3 +=  W4*col[8*4];                         \
199        }                                               \
200                                                        \
201        if (col[8*5]) {                                 \
202            MAC(b0,  W5, col[8*5]);                     \
203            MAC(b1, -W1, col[8*5]);                     \
204            MAC(b2,  W7, col[8*5]);                     \
205            MAC(b3,  W3, col[8*5]);                     \
206        }                                               \
207                                                        \
208        if (col[8*6]) {                                 \
209            a0 +=  W6*col[8*6];                         \
210            a1 += -W2*col[8*6];                         \
211            a2 +=  W2*col[8*6];                         \
212            a3 += -W6*col[8*6];                         \
213        }                                               \
214                                                        \
215        if (col[8*7]) {                                 \
216            MAC(b0,  W7, col[8*7]);                     \
217            MAC(b1, -W5, col[8*7]);                     \
218            MAC(b2,  W3, col[8*7]);                     \
219            MAC(b3, -W1, col[8*7]);                     \
220        }                                               \
221    } while (0)
222
223static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
224                                          DCTELEM *col)
225{
226    int a0, a1, a2, a3, b0, b1, b2, b3;
227
228    IDCT_COLS;
229
230    dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
231    dest += line_size;
232    dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
233    dest += line_size;
234    dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
235    dest += line_size;
236    dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
237    dest += line_size;
238    dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
239    dest += line_size;
240    dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
241    dest += line_size;
242    dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
243    dest += line_size;
244    dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
245}
246
247static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
248                                          DCTELEM *col)
249{
250    int a0, a1, a2, a3, b0, b1, b2, b3;
251
252    IDCT_COLS;
253
254    dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
255    dest += line_size;
256    dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
257    dest += line_size;
258    dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
259    dest += line_size;
260    dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
261    dest += line_size;
262    dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
263    dest += line_size;
264    dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
265    dest += line_size;
266    dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
267    dest += line_size;
268    dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
269}
270
271static inline void FUNC(idctSparseCol)(DCTELEM *col)
272{
273    int a0, a1, a2, a3, b0, b1, b2, b3;
274
275    IDCT_COLS;
276
277    col[0 ] = ((a0 + b0) >> COL_SHIFT);
278    col[8 ] = ((a1 + b1) >> COL_SHIFT);
279    col[16] = ((a2 + b2) >> COL_SHIFT);
280    col[24] = ((a3 + b3) >> COL_SHIFT);
281    col[32] = ((a3 - b3) >> COL_SHIFT);
282    col[40] = ((a2 - b2) >> COL_SHIFT);
283    col[48] = ((a1 - b1) >> COL_SHIFT);
284    col[56] = ((a0 - b0) >> COL_SHIFT);
285}
286
287void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, DCTELEM *block)
288{
289    pixel *dest = (pixel *)dest_;
290    int i;
291
292    line_size /= sizeof(pixel);
293
294    for (i = 0; i < 8; i++)
295        FUNC(idctRowCondDC)(block + i*8, 0);
296
297    for (i = 0; i < 8; i++)
298        FUNC(idctSparseColPut)(dest + i, line_size, block + i);
299}
300
301void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, DCTELEM *block)
302{
303    pixel *dest = (pixel *)dest_;
304    int i;
305
306    line_size /= sizeof(pixel);
307
308    for (i = 0; i < 8; i++)
309        FUNC(idctRowCondDC)(block + i*8, 0);
310
311    for (i = 0; i < 8; i++)
312        FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
313}
314
315void FUNC(ff_simple_idct)(DCTELEM *block)
316{
317    int i;
318
319    for (i = 0; i < 8; i++)
320        FUNC(idctRowCondDC)(block + i*8, 0);
321
322    for (i = 0; i < 8; i++)
323        FUNC(idctSparseCol)(block + i);
324}
325