1/* 2 * Simple IDCT 3 * 4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23/** 24 * @file 25 * simpleidct in C. 26 */ 27 28/* 29 based upon some outcommented c code from mpeg2dec (idct_mmx.c 30 written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) 31 */ 32 33#include "simple_idct.h" 34 35#include "bit_depth_template.c" 36 37#undef W1 38#undef W2 39#undef W3 40#undef W4 41#undef W5 42#undef W6 43#undef W7 44#undef ROW_SHIFT 45#undef COL_SHIFT 46#undef DC_SHIFT 47#undef MUL 48#undef MAC 49 50#if BIT_DEPTH == 8 51 52#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 53#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 54#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 55#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 56#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 57#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 58#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 59 60#define ROW_SHIFT 11 61#define COL_SHIFT 20 62#define DC_SHIFT 3 63 64#define MUL(a, b) MUL16(a, b) 65#define MAC(a, b, c) MAC16(a, b, c) 66 67#elif BIT_DEPTH == 10 || BIT_DEPTH == 12 68 69#if BIT_DEPTH == 10 70#define W1 (22725*4) // 90901 71#define W2 (21407*4) // 85627 72#define W3 (19265*4) // 77062 73#define W4 (16384*4) // 65535 74#define W5 (12873*4) // 51491 75#define W6 ( 8867*4) // 35468 76#define W7 ( 4520*4) // 18081 77 78#define ROW_SHIFT 15 79#define COL_SHIFT 20 80#define DC_SHIFT 1 81#else 82#define W1 45451 83#define W2 42813 84#define W3 38531 85#define W4 32767 86#define W5 25746 87#define W6 17734 88#define W7 9041 89 90#define ROW_SHIFT 16 91#define COL_SHIFT 17 92#define DC_SHIFT -1 93#endif 94 95#define MUL(a, b) ((a) * (b)) 96#define MAC(a, b, c) ((a) += (b) * (c)) 97 98#else 99 100#error "Unsupported bitdepth" 101 102#endif 103 104static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) 105{ 106 int a0, a1, a2, a3, b0, b1, b2, b3; 107 108#if HAVE_FAST_64BIT 109#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) 110 if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { 111 uint64_t temp; 112 if (DC_SHIFT - extra_shift >= 0) { 113 temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; 114 } else { 115 temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; 116 } 117 temp += temp * (1 << 16); 118 temp += temp * ((uint64_t) 1 << 32); 119 ((uint64_t *)row)[0] = temp; 120 ((uint64_t *)row)[1] = temp; 121 return; 122 } 123#else 124 if (!(((uint32_t*)row)[1] | 125 ((uint32_t*)row)[2] | 126 ((uint32_t*)row)[3] | 127 row[1])) { 128 uint32_t temp; 129 if (DC_SHIFT - extra_shift >= 0) { 130 temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; 131 } else { 132 temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; 133 } 134 temp += temp * (1 << 16); 135 ((uint32_t*)row)[0]=((uint32_t*)row)[1] = 136 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; 137 return; 138 } 139#endif 140 141 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); 142 a1 = a0; 143 a2 = a0; 144 a3 = a0; 145 146 a0 += W2 * row[2]; 147 a1 += W6 * row[2]; 148 a2 -= W6 * row[2]; 149 a3 -= W2 * row[2]; 150 151 b0 = MUL(W1, row[1]); 152 MAC(b0, W3, row[3]); 153 b1 = MUL(W3, row[1]); 154 MAC(b1, -W7, row[3]); 155 b2 = MUL(W5, row[1]); 156 MAC(b2, -W1, row[3]); 157 b3 = MUL(W7, row[1]); 158 MAC(b3, -W5, row[3]); 159 160 if (AV_RN64A(row + 4)) { 161 a0 += W4*row[4] + W6*row[6]; 162 a1 += - W4*row[4] - W2*row[6]; 163 a2 += - W4*row[4] + W2*row[6]; 164 a3 += W4*row[4] - W6*row[6]; 165 166 MAC(b0, W5, row[5]); 167 MAC(b0, W7, row[7]); 168 169 MAC(b1, -W1, row[5]); 170 MAC(b1, -W5, row[7]); 171 172 MAC(b2, W7, row[5]); 173 MAC(b2, W3, row[7]); 174 175 MAC(b3, W3, row[5]); 176 MAC(b3, -W1, row[7]); 177 } 178 179 row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift); 180 row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift); 181 row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift); 182 row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift); 183 row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift); 184 row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift); 185 row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift); 186 row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift); 187} 188 189#define IDCT_COLS do { \ 190 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ 191 a1 = a0; \ 192 a2 = a0; \ 193 a3 = a0; \ 194 \ 195 a0 += W2*col[8*2]; \ 196 a1 += W6*col[8*2]; \ 197 a2 += -W6*col[8*2]; \ 198 a3 += -W2*col[8*2]; \ 199 \ 200 b0 = MUL(W1, col[8*1]); \ 201 b1 = MUL(W3, col[8*1]); \ 202 b2 = MUL(W5, col[8*1]); \ 203 b3 = MUL(W7, col[8*1]); \ 204 \ 205 MAC(b0, W3, col[8*3]); \ 206 MAC(b1, -W7, col[8*3]); \ 207 MAC(b2, -W1, col[8*3]); \ 208 MAC(b3, -W5, col[8*3]); \ 209 \ 210 if (col[8*4]) { \ 211 a0 += W4*col[8*4]; \ 212 a1 += -W4*col[8*4]; \ 213 a2 += -W4*col[8*4]; \ 214 a3 += W4*col[8*4]; \ 215 } \ 216 \ 217 if (col[8*5]) { \ 218 MAC(b0, W5, col[8*5]); \ 219 MAC(b1, -W1, col[8*5]); \ 220 MAC(b2, W7, col[8*5]); \ 221 MAC(b3, W3, col[8*5]); \ 222 } \ 223 \ 224 if (col[8*6]) { \ 225 a0 += W6*col[8*6]; \ 226 a1 += -W2*col[8*6]; \ 227 a2 += W2*col[8*6]; \ 228 a3 += -W6*col[8*6]; \ 229 } \ 230 \ 231 if (col[8*7]) { \ 232 MAC(b0, W7, col[8*7]); \ 233 MAC(b1, -W5, col[8*7]); \ 234 MAC(b2, W3, col[8*7]); \ 235 MAC(b3, -W1, col[8*7]); \ 236 } \ 237 } while (0) 238 239static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, 240 int16_t *col) 241{ 242 int a0, a1, a2, a3, b0, b1, b2, b3; 243 244 IDCT_COLS; 245 246 dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); 247 dest += line_size; 248 dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); 249 dest += line_size; 250 dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); 251 dest += line_size; 252 dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); 253 dest += line_size; 254 dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); 255 dest += line_size; 256 dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); 257 dest += line_size; 258 dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); 259 dest += line_size; 260 dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); 261} 262 263static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, 264 int16_t *col) 265{ 266 int a0, a1, a2, a3, b0, b1, b2, b3; 267 268 IDCT_COLS; 269 270 dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT)); 271 dest += line_size; 272 dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT)); 273 dest += line_size; 274 dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT)); 275 dest += line_size; 276 dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT)); 277 dest += line_size; 278 dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT)); 279 dest += line_size; 280 dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT)); 281 dest += line_size; 282 dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT)); 283 dest += line_size; 284 dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); 285} 286 287static inline void FUNC(idctSparseCol)(int16_t *col) 288{ 289 int a0, a1, a2, a3, b0, b1, b2, b3; 290 291 IDCT_COLS; 292 293 col[0 ] = ((a0 + b0) >> COL_SHIFT); 294 col[8 ] = ((a1 + b1) >> COL_SHIFT); 295 col[16] = ((a2 + b2) >> COL_SHIFT); 296 col[24] = ((a3 + b3) >> COL_SHIFT); 297 col[32] = ((a3 - b3) >> COL_SHIFT); 298 col[40] = ((a2 - b2) >> COL_SHIFT); 299 col[48] = ((a1 - b1) >> COL_SHIFT); 300 col[56] = ((a0 - b0) >> COL_SHIFT); 301} 302 303void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) 304{ 305 pixel *dest = (pixel *)dest_; 306 int i; 307 308 line_size /= sizeof(pixel); 309 310 for (i = 0; i < 8; i++) 311 FUNC(idctRowCondDC)(block + i*8, 0); 312 313 for (i = 0; i < 8; i++) 314 FUNC(idctSparseColPut)(dest + i, line_size, block + i); 315} 316 317void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block) 318{ 319 pixel *dest = (pixel *)dest_; 320 int i; 321 322 line_size /= sizeof(pixel); 323 324 for (i = 0; i < 8; i++) 325 FUNC(idctRowCondDC)(block + i*8, 0); 326 327 for (i = 0; i < 8; i++) 328 FUNC(idctSparseColAdd)(dest + i, line_size, block + i); 329} 330 331void FUNC(ff_simple_idct)(int16_t *block) 332{ 333 int i; 334 335 for (i = 0; i < 8; i++) 336 FUNC(idctRowCondDC)(block + i*8, 0); 337 338 for (i = 0; i < 8; i++) 339 FUNC(idctSparseCol)(block + i); 340} 341