1/* 2 * jrevdct.c 3 * 4 * This file is part of the Independent JPEG Group's software. 5 * 6 * The authors make NO WARRANTY or representation, either express or implied, 7 * with respect to this software, its quality, accuracy, merchantability, or 8 * fitness for a particular purpose. This software is provided "AS IS", and 9 * you, its user, assume the entire risk as to its quality and accuracy. 10 * 11 * This software is copyright (C) 1991, 1992, Thomas G. Lane. 12 * All Rights Reserved except as specified below. 13 * 14 * Permission is hereby granted to use, copy, modify, and distribute this 15 * software (or portions thereof) for any purpose, without fee, subject to 16 * these conditions: 17 * (1) If any part of the source code for this software is distributed, then 18 * this README file must be included, with this copyright and no-warranty 19 * notice unaltered; and any additions, deletions, or changes to the original 20 * files must be clearly indicated in accompanying documentation. 21 * (2) If only executable code is distributed, then the accompanying 22 * documentation must state that "this software is based in part on the work 23 * of the Independent JPEG Group". 24 * (3) Permission for use of this software is granted only if the user accepts 25 * full responsibility for any undesirable consequences; the authors accept 26 * NO LIABILITY for damages of any kind. 27 * 28 * These conditions apply to any software derived from or based on the IJG 29 * code, not just to the unmodified library. If you use our work, you ought 30 * to acknowledge us. 31 * 32 * Permission is NOT granted for the use of any IJG author's name or company 33 * name in advertising or publicity relating to this software or products 34 * derived from it. This software may be referred to only as "the Independent 35 * JPEG Group's software". 36 * 37 * We specifically permit and encourage the use of this software as the basis 38 * of commercial products, provided that all warranty or liability claims are 39 * assumed by the product vendor. 40 * 41 * This file contains the basic inverse-DCT transformation subroutine. 42 * 43 * This implementation is based on an algorithm described in 44 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 45 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 46 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 47 * The primary algorithm described there uses 11 multiplies and 29 adds. 48 * We use their alternate method with 12 multiplies and 32 adds. 49 * The advantage of this method is that no data path contains more than one 50 * multiplication; this allows a very simple and accurate implementation in 51 * scaled fixed-point arithmetic, with a minimal number of shifts. 52 * 53 * I've made lots of modifications to attempt to take advantage of the 54 * sparse nature of the DCT matrices we're getting. Although the logic 55 * is cumbersome, it's straightforward and the resulting code is much 56 * faster. 57 * 58 * A better way to do this would be to pass in the DCT block as a sparse 59 * matrix, perhaps with the difference cases encoded. 60 */ 61 62/** 63 * @file libavcodec/jrevdct.c 64 * Independent JPEG Group's LLM idct. 65 */ 66 67#include "libavutil/common.h" 68#include "dsputil.h" 69 70#define EIGHT_BIT_SAMPLES 71 72#define DCTSIZE 8 73#define DCTSIZE2 64 74 75#define GLOBAL 76 77#define RIGHT_SHIFT(x, n) ((x) >> (n)) 78 79typedef DCTELEM DCTBLOCK[DCTSIZE2]; 80 81#define CONST_BITS 13 82 83/* 84 * This routine is specialized to the case DCTSIZE = 8. 85 */ 86 87#if DCTSIZE != 8 88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 89#endif 90 91 92/* 93 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT 94 * on each column. Direct algorithms are also available, but they are 95 * much more complex and seem not to be any faster when reduced to code. 96 * 97 * The poop on this scaling stuff is as follows: 98 * 99 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 100 * larger than the true IDCT outputs. The final outputs are therefore 101 * a factor of N larger than desired; since N=8 this can be cured by 102 * a simple right shift at the end of the algorithm. The advantage of 103 * this arrangement is that we save two multiplications per 1-D IDCT, 104 * because the y0 and y4 inputs need not be divided by sqrt(N). 105 * 106 * We have to do addition and subtraction of the integer inputs, which 107 * is no problem, and multiplication by fractional constants, which is 108 * a problem to do in integer arithmetic. We multiply all the constants 109 * by CONST_SCALE and convert them to integer constants (thus retaining 110 * CONST_BITS bits of precision in the constants). After doing a 111 * multiplication we have to divide the product by CONST_SCALE, with proper 112 * rounding, to produce the correct output. This division can be done 113 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 114 * as long as possible so that partial sums can be added together with 115 * full fractional precision. 116 * 117 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 118 * they are represented to better-than-integral precision. These outputs 119 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 120 * with the recommended scaling. (To scale up 12-bit sample data further, an 121 * intermediate int32 array would be needed.) 122 * 123 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 124 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 125 * shows that the values given below are the most effective. 126 */ 127 128#ifdef EIGHT_BIT_SAMPLES 129#define PASS1_BITS 2 130#else 131#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 132#endif 133 134#define ONE ((int32_t) 1) 135 136#define CONST_SCALE (ONE << CONST_BITS) 137 138/* Convert a positive real constant to an integer scaled by CONST_SCALE. 139 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, 140 * you will pay a significant penalty in run time. In that case, figure 141 * the correct integer constant values and insert them by hand. 142 */ 143 144/* Actually FIX is no longer used, we precomputed them all */ 145#define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) 146 147/* Descale and correctly round an int32_t value that's scaled by N bits. 148 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding 149 * the fudge factor is correct for either sign of X. 150 */ 151 152#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) 153 154/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. 155 * For 8-bit samples with the recommended scaling, all the variable 156 * and constant values involved are no more than 16 bits wide, so a 157 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; 158 * this provides a useful speedup on many machines. 159 * There is no way to specify a 16x16->32 multiply in portable C, but 160 * some C compilers will do the right thing if you provide the correct 161 * combination of casts. 162 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. 163 */ 164 165#ifdef EIGHT_BIT_SAMPLES 166#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ 167#define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) 168#endif 169#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ 170#define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) 171#endif 172#endif 173 174#ifndef MULTIPLY /* default definition */ 175#define MULTIPLY(var,const) ((var) * (const)) 176#endif 177 178 179/* 180 Unlike our decoder where we approximate the FIXes, we need to use exact 181ones here or successive P-frames will drift too much with Reference frame coding 182*/ 183#define FIX_0_211164243 1730 184#define FIX_0_275899380 2260 185#define FIX_0_298631336 2446 186#define FIX_0_390180644 3196 187#define FIX_0_509795579 4176 188#define FIX_0_541196100 4433 189#define FIX_0_601344887 4926 190#define FIX_0_765366865 6270 191#define FIX_0_785694958 6436 192#define FIX_0_899976223 7373 193#define FIX_1_061594337 8697 194#define FIX_1_111140466 9102 195#define FIX_1_175875602 9633 196#define FIX_1_306562965 10703 197#define FIX_1_387039845 11363 198#define FIX_1_451774981 11893 199#define FIX_1_501321110 12299 200#define FIX_1_662939225 13623 201#define FIX_1_847759065 15137 202#define FIX_1_961570560 16069 203#define FIX_2_053119869 16819 204#define FIX_2_172734803 17799 205#define FIX_2_562915447 20995 206#define FIX_3_072711026 25172 207 208/* 209 * Perform the inverse DCT on one block of coefficients. 210 */ 211 212void j_rev_dct(DCTBLOCK data) 213{ 214 int32_t tmp0, tmp1, tmp2, tmp3; 215 int32_t tmp10, tmp11, tmp12, tmp13; 216 int32_t z1, z2, z3, z4, z5; 217 int32_t d0, d1, d2, d3, d4, d5, d6, d7; 218 register DCTELEM *dataptr; 219 int rowctr; 220 221 /* Pass 1: process rows. */ 222 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 223 /* furthermore, we scale the results by 2**PASS1_BITS. */ 224 225 dataptr = data; 226 227 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 228 /* Due to quantization, we will usually find that many of the input 229 * coefficients are zero, especially the AC terms. We can exploit this 230 * by short-circuiting the IDCT calculation for any row in which all 231 * the AC terms are zero. In that case each output is equal to the 232 * DC coefficient (with scale factor as needed). 233 * With typical images and quantization tables, half or more of the 234 * row DCT calculations can be simplified this way. 235 */ 236 237 register int *idataptr = (int*)dataptr; 238 239 /* WARNING: we do the same permutation as MMX idct to simplify the 240 video core */ 241 d0 = dataptr[0]; 242 d2 = dataptr[1]; 243 d4 = dataptr[2]; 244 d6 = dataptr[3]; 245 d1 = dataptr[4]; 246 d3 = dataptr[5]; 247 d5 = dataptr[6]; 248 d7 = dataptr[7]; 249 250 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { 251 /* AC terms all zero */ 252 if (d0) { 253 /* Compute a 32 bit value to assign. */ 254 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 255 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); 256 257 idataptr[0] = v; 258 idataptr[1] = v; 259 idataptr[2] = v; 260 idataptr[3] = v; 261 } 262 263 dataptr += DCTSIZE; /* advance pointer to next row */ 264 continue; 265 } 266 267 /* Even part: reverse the even part of the forward DCT. */ 268 /* The rotator is sqrt(2)*c(-6). */ 269{ 270 if (d6) { 271 if (d2) { 272 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 273 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 274 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 275 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 276 277 tmp0 = (d0 + d4) << CONST_BITS; 278 tmp1 = (d0 - d4) << CONST_BITS; 279 280 tmp10 = tmp0 + tmp3; 281 tmp13 = tmp0 - tmp3; 282 tmp11 = tmp1 + tmp2; 283 tmp12 = tmp1 - tmp2; 284 } else { 285 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 286 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 287 tmp3 = MULTIPLY(d6, FIX_0_541196100); 288 289 tmp0 = (d0 + d4) << CONST_BITS; 290 tmp1 = (d0 - d4) << CONST_BITS; 291 292 tmp10 = tmp0 + tmp3; 293 tmp13 = tmp0 - tmp3; 294 tmp11 = tmp1 + tmp2; 295 tmp12 = tmp1 - tmp2; 296 } 297 } else { 298 if (d2) { 299 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 300 tmp2 = MULTIPLY(d2, FIX_0_541196100); 301 tmp3 = MULTIPLY(d2, FIX_1_306562965); 302 303 tmp0 = (d0 + d4) << CONST_BITS; 304 tmp1 = (d0 - d4) << CONST_BITS; 305 306 tmp10 = tmp0 + tmp3; 307 tmp13 = tmp0 - tmp3; 308 tmp11 = tmp1 + tmp2; 309 tmp12 = tmp1 - tmp2; 310 } else { 311 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 312 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 313 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 314 } 315 } 316 317 /* Odd part per figure 8; the matrix is unitary and hence its 318 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 319 */ 320 321 if (d7) { 322 if (d5) { 323 if (d3) { 324 if (d1) { 325 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ 326 z1 = d7 + d1; 327 z2 = d5 + d3; 328 z3 = d7 + d3; 329 z4 = d5 + d1; 330 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 331 332 tmp0 = MULTIPLY(d7, FIX_0_298631336); 333 tmp1 = MULTIPLY(d5, FIX_2_053119869); 334 tmp2 = MULTIPLY(d3, FIX_3_072711026); 335 tmp3 = MULTIPLY(d1, FIX_1_501321110); 336 z1 = MULTIPLY(-z1, FIX_0_899976223); 337 z2 = MULTIPLY(-z2, FIX_2_562915447); 338 z3 = MULTIPLY(-z3, FIX_1_961570560); 339 z4 = MULTIPLY(-z4, FIX_0_390180644); 340 341 z3 += z5; 342 z4 += z5; 343 344 tmp0 += z1 + z3; 345 tmp1 += z2 + z4; 346 tmp2 += z2 + z3; 347 tmp3 += z1 + z4; 348 } else { 349 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ 350 z2 = d5 + d3; 351 z3 = d7 + d3; 352 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 353 354 tmp0 = MULTIPLY(d7, FIX_0_298631336); 355 tmp1 = MULTIPLY(d5, FIX_2_053119869); 356 tmp2 = MULTIPLY(d3, FIX_3_072711026); 357 z1 = MULTIPLY(-d7, FIX_0_899976223); 358 z2 = MULTIPLY(-z2, FIX_2_562915447); 359 z3 = MULTIPLY(-z3, FIX_1_961570560); 360 z4 = MULTIPLY(-d5, FIX_0_390180644); 361 362 z3 += z5; 363 z4 += z5; 364 365 tmp0 += z1 + z3; 366 tmp1 += z2 + z4; 367 tmp2 += z2 + z3; 368 tmp3 = z1 + z4; 369 } 370 } else { 371 if (d1) { 372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ 373 z1 = d7 + d1; 374 z4 = d5 + d1; 375 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); 376 377 tmp0 = MULTIPLY(d7, FIX_0_298631336); 378 tmp1 = MULTIPLY(d5, FIX_2_053119869); 379 tmp3 = MULTIPLY(d1, FIX_1_501321110); 380 z1 = MULTIPLY(-z1, FIX_0_899976223); 381 z2 = MULTIPLY(-d5, FIX_2_562915447); 382 z3 = MULTIPLY(-d7, FIX_1_961570560); 383 z4 = MULTIPLY(-z4, FIX_0_390180644); 384 385 z3 += z5; 386 z4 += z5; 387 388 tmp0 += z1 + z3; 389 tmp1 += z2 + z4; 390 tmp2 = z2 + z3; 391 tmp3 += z1 + z4; 392 } else { 393 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ 394 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 395 z1 = MULTIPLY(-d7, FIX_0_899976223); 396 z3 = MULTIPLY(-d7, FIX_1_961570560); 397 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 398 z2 = MULTIPLY(-d5, FIX_2_562915447); 399 z4 = MULTIPLY(-d5, FIX_0_390180644); 400 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 401 402 z3 += z5; 403 z4 += z5; 404 405 tmp0 += z3; 406 tmp1 += z4; 407 tmp2 = z2 + z3; 408 tmp3 = z1 + z4; 409 } 410 } 411 } else { 412 if (d3) { 413 if (d1) { 414 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ 415 z1 = d7 + d1; 416 z3 = d7 + d3; 417 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 418 419 tmp0 = MULTIPLY(d7, FIX_0_298631336); 420 tmp2 = MULTIPLY(d3, FIX_3_072711026); 421 tmp3 = MULTIPLY(d1, FIX_1_501321110); 422 z1 = MULTIPLY(-z1, FIX_0_899976223); 423 z2 = MULTIPLY(-d3, FIX_2_562915447); 424 z3 = MULTIPLY(-z3, FIX_1_961570560); 425 z4 = MULTIPLY(-d1, FIX_0_390180644); 426 427 z3 += z5; 428 z4 += z5; 429 430 tmp0 += z1 + z3; 431 tmp1 = z2 + z4; 432 tmp2 += z2 + z3; 433 tmp3 += z1 + z4; 434 } else { 435 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ 436 z3 = d7 + d3; 437 438 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 439 z1 = MULTIPLY(-d7, FIX_0_899976223); 440 tmp2 = MULTIPLY(d3, FIX_0_509795579); 441 z2 = MULTIPLY(-d3, FIX_2_562915447); 442 z5 = MULTIPLY(z3, FIX_1_175875602); 443 z3 = MULTIPLY(-z3, FIX_0_785694958); 444 445 tmp0 += z3; 446 tmp1 = z2 + z5; 447 tmp2 += z3; 448 tmp3 = z1 + z5; 449 } 450 } else { 451 if (d1) { 452 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ 453 z1 = d7 + d1; 454 z5 = MULTIPLY(z1, FIX_1_175875602); 455 456 z1 = MULTIPLY(z1, FIX_0_275899380); 457 z3 = MULTIPLY(-d7, FIX_1_961570560); 458 tmp0 = MULTIPLY(-d7, FIX_1_662939225); 459 z4 = MULTIPLY(-d1, FIX_0_390180644); 460 tmp3 = MULTIPLY(d1, FIX_1_111140466); 461 462 tmp0 += z1; 463 tmp1 = z4 + z5; 464 tmp2 = z3 + z5; 465 tmp3 += z1; 466 } else { 467 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ 468 tmp0 = MULTIPLY(-d7, FIX_1_387039845); 469 tmp1 = MULTIPLY(d7, FIX_1_175875602); 470 tmp2 = MULTIPLY(-d7, FIX_0_785694958); 471 tmp3 = MULTIPLY(d7, FIX_0_275899380); 472 } 473 } 474 } 475 } else { 476 if (d5) { 477 if (d3) { 478 if (d1) { 479 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ 480 z2 = d5 + d3; 481 z4 = d5 + d1; 482 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 483 484 tmp1 = MULTIPLY(d5, FIX_2_053119869); 485 tmp2 = MULTIPLY(d3, FIX_3_072711026); 486 tmp3 = MULTIPLY(d1, FIX_1_501321110); 487 z1 = MULTIPLY(-d1, FIX_0_899976223); 488 z2 = MULTIPLY(-z2, FIX_2_562915447); 489 z3 = MULTIPLY(-d3, FIX_1_961570560); 490 z4 = MULTIPLY(-z4, FIX_0_390180644); 491 492 z3 += z5; 493 z4 += z5; 494 495 tmp0 = z1 + z3; 496 tmp1 += z2 + z4; 497 tmp2 += z2 + z3; 498 tmp3 += z1 + z4; 499 } else { 500 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ 501 z2 = d5 + d3; 502 503 z5 = MULTIPLY(z2, FIX_1_175875602); 504 tmp1 = MULTIPLY(d5, FIX_1_662939225); 505 z4 = MULTIPLY(-d5, FIX_0_390180644); 506 z2 = MULTIPLY(-z2, FIX_1_387039845); 507 tmp2 = MULTIPLY(d3, FIX_1_111140466); 508 z3 = MULTIPLY(-d3, FIX_1_961570560); 509 510 tmp0 = z3 + z5; 511 tmp1 += z2; 512 tmp2 += z2; 513 tmp3 = z4 + z5; 514 } 515 } else { 516 if (d1) { 517 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ 518 z4 = d5 + d1; 519 520 z5 = MULTIPLY(z4, FIX_1_175875602); 521 z1 = MULTIPLY(-d1, FIX_0_899976223); 522 tmp3 = MULTIPLY(d1, FIX_0_601344887); 523 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 524 z2 = MULTIPLY(-d5, FIX_2_562915447); 525 z4 = MULTIPLY(z4, FIX_0_785694958); 526 527 tmp0 = z1 + z5; 528 tmp1 += z4; 529 tmp2 = z2 + z5; 530 tmp3 += z4; 531 } else { 532 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ 533 tmp0 = MULTIPLY(d5, FIX_1_175875602); 534 tmp1 = MULTIPLY(d5, FIX_0_275899380); 535 tmp2 = MULTIPLY(-d5, FIX_1_387039845); 536 tmp3 = MULTIPLY(d5, FIX_0_785694958); 537 } 538 } 539 } else { 540 if (d3) { 541 if (d1) { 542 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ 543 z5 = d1 + d3; 544 tmp3 = MULTIPLY(d1, FIX_0_211164243); 545 tmp2 = MULTIPLY(-d3, FIX_1_451774981); 546 z1 = MULTIPLY(d1, FIX_1_061594337); 547 z2 = MULTIPLY(-d3, FIX_2_172734803); 548 z4 = MULTIPLY(z5, FIX_0_785694958); 549 z5 = MULTIPLY(z5, FIX_1_175875602); 550 551 tmp0 = z1 - z4; 552 tmp1 = z2 + z4; 553 tmp2 += z5; 554 tmp3 += z5; 555 } else { 556 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ 557 tmp0 = MULTIPLY(-d3, FIX_0_785694958); 558 tmp1 = MULTIPLY(-d3, FIX_1_387039845); 559 tmp2 = MULTIPLY(-d3, FIX_0_275899380); 560 tmp3 = MULTIPLY(d3, FIX_1_175875602); 561 } 562 } else { 563 if (d1) { 564 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ 565 tmp0 = MULTIPLY(d1, FIX_0_275899380); 566 tmp1 = MULTIPLY(d1, FIX_0_785694958); 567 tmp2 = MULTIPLY(d1, FIX_1_175875602); 568 tmp3 = MULTIPLY(d1, FIX_1_387039845); 569 } else { 570 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ 571 tmp0 = tmp1 = tmp2 = tmp3 = 0; 572 } 573 } 574 } 575 } 576} 577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 578 579 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 580 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 581 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 582 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 583 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 584 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 585 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 586 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 587 588 dataptr += DCTSIZE; /* advance pointer to next row */ 589 } 590 591 /* Pass 2: process columns. */ 592 /* Note that we must descale the results by a factor of 8 == 2**3, */ 593 /* and also undo the PASS1_BITS scaling. */ 594 595 dataptr = data; 596 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 597 /* Columns of zeroes can be exploited in the same way as we did with rows. 598 * However, the row calculation has created many nonzero AC terms, so the 599 * simplification applies less often (typically 5% to 10% of the time). 600 * On machines with very fast multiplication, it's possible that the 601 * test takes more time than it's worth. In that case this section 602 * may be commented out. 603 */ 604 605 d0 = dataptr[DCTSIZE*0]; 606 d1 = dataptr[DCTSIZE*1]; 607 d2 = dataptr[DCTSIZE*2]; 608 d3 = dataptr[DCTSIZE*3]; 609 d4 = dataptr[DCTSIZE*4]; 610 d5 = dataptr[DCTSIZE*5]; 611 d6 = dataptr[DCTSIZE*6]; 612 d7 = dataptr[DCTSIZE*7]; 613 614 /* Even part: reverse the even part of the forward DCT. */ 615 /* The rotator is sqrt(2)*c(-6). */ 616 if (d6) { 617 if (d2) { 618 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 619 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 620 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 621 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 622 623 tmp0 = (d0 + d4) << CONST_BITS; 624 tmp1 = (d0 - d4) << CONST_BITS; 625 626 tmp10 = tmp0 + tmp3; 627 tmp13 = tmp0 - tmp3; 628 tmp11 = tmp1 + tmp2; 629 tmp12 = tmp1 - tmp2; 630 } else { 631 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 632 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 633 tmp3 = MULTIPLY(d6, FIX_0_541196100); 634 635 tmp0 = (d0 + d4) << CONST_BITS; 636 tmp1 = (d0 - d4) << CONST_BITS; 637 638 tmp10 = tmp0 + tmp3; 639 tmp13 = tmp0 - tmp3; 640 tmp11 = tmp1 + tmp2; 641 tmp12 = tmp1 - tmp2; 642 } 643 } else { 644 if (d2) { 645 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 646 tmp2 = MULTIPLY(d2, FIX_0_541196100); 647 tmp3 = MULTIPLY(d2, FIX_1_306562965); 648 649 tmp0 = (d0 + d4) << CONST_BITS; 650 tmp1 = (d0 - d4) << CONST_BITS; 651 652 tmp10 = tmp0 + tmp3; 653 tmp13 = tmp0 - tmp3; 654 tmp11 = tmp1 + tmp2; 655 tmp12 = tmp1 - tmp2; 656 } else { 657 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 658 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 659 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 660 } 661 } 662 663 /* Odd part per figure 8; the matrix is unitary and hence its 664 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 665 */ 666 if (d7) { 667 if (d5) { 668 if (d3) { 669 if (d1) { 670 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ 671 z1 = d7 + d1; 672 z2 = d5 + d3; 673 z3 = d7 + d3; 674 z4 = d5 + d1; 675 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 676 677 tmp0 = MULTIPLY(d7, FIX_0_298631336); 678 tmp1 = MULTIPLY(d5, FIX_2_053119869); 679 tmp2 = MULTIPLY(d3, FIX_3_072711026); 680 tmp3 = MULTIPLY(d1, FIX_1_501321110); 681 z1 = MULTIPLY(-z1, FIX_0_899976223); 682 z2 = MULTIPLY(-z2, FIX_2_562915447); 683 z3 = MULTIPLY(-z3, FIX_1_961570560); 684 z4 = MULTIPLY(-z4, FIX_0_390180644); 685 686 z3 += z5; 687 z4 += z5; 688 689 tmp0 += z1 + z3; 690 tmp1 += z2 + z4; 691 tmp2 += z2 + z3; 692 tmp3 += z1 + z4; 693 } else { 694 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ 695 z1 = d7; 696 z2 = d5 + d3; 697 z3 = d7 + d3; 698 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 699 700 tmp0 = MULTIPLY(d7, FIX_0_298631336); 701 tmp1 = MULTIPLY(d5, FIX_2_053119869); 702 tmp2 = MULTIPLY(d3, FIX_3_072711026); 703 z1 = MULTIPLY(-d7, FIX_0_899976223); 704 z2 = MULTIPLY(-z2, FIX_2_562915447); 705 z3 = MULTIPLY(-z3, FIX_1_961570560); 706 z4 = MULTIPLY(-d5, FIX_0_390180644); 707 708 z3 += z5; 709 z4 += z5; 710 711 tmp0 += z1 + z3; 712 tmp1 += z2 + z4; 713 tmp2 += z2 + z3; 714 tmp3 = z1 + z4; 715 } 716 } else { 717 if (d1) { 718 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ 719 z1 = d7 + d1; 720 z2 = d5; 721 z3 = d7; 722 z4 = d5 + d1; 723 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 724 725 tmp0 = MULTIPLY(d7, FIX_0_298631336); 726 tmp1 = MULTIPLY(d5, FIX_2_053119869); 727 tmp3 = MULTIPLY(d1, FIX_1_501321110); 728 z1 = MULTIPLY(-z1, FIX_0_899976223); 729 z2 = MULTIPLY(-d5, FIX_2_562915447); 730 z3 = MULTIPLY(-d7, FIX_1_961570560); 731 z4 = MULTIPLY(-z4, FIX_0_390180644); 732 733 z3 += z5; 734 z4 += z5; 735 736 tmp0 += z1 + z3; 737 tmp1 += z2 + z4; 738 tmp2 = z2 + z3; 739 tmp3 += z1 + z4; 740 } else { 741 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ 742 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 743 z1 = MULTIPLY(-d7, FIX_0_899976223); 744 z3 = MULTIPLY(-d7, FIX_1_961570560); 745 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 746 z2 = MULTIPLY(-d5, FIX_2_562915447); 747 z4 = MULTIPLY(-d5, FIX_0_390180644); 748 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 749 750 z3 += z5; 751 z4 += z5; 752 753 tmp0 += z3; 754 tmp1 += z4; 755 tmp2 = z2 + z3; 756 tmp3 = z1 + z4; 757 } 758 } 759 } else { 760 if (d3) { 761 if (d1) { 762 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ 763 z1 = d7 + d1; 764 z3 = d7 + d3; 765 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 766 767 tmp0 = MULTIPLY(d7, FIX_0_298631336); 768 tmp2 = MULTIPLY(d3, FIX_3_072711026); 769 tmp3 = MULTIPLY(d1, FIX_1_501321110); 770 z1 = MULTIPLY(-z1, FIX_0_899976223); 771 z2 = MULTIPLY(-d3, FIX_2_562915447); 772 z3 = MULTIPLY(-z3, FIX_1_961570560); 773 z4 = MULTIPLY(-d1, FIX_0_390180644); 774 775 z3 += z5; 776 z4 += z5; 777 778 tmp0 += z1 + z3; 779 tmp1 = z2 + z4; 780 tmp2 += z2 + z3; 781 tmp3 += z1 + z4; 782 } else { 783 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ 784 z3 = d7 + d3; 785 786 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 787 z1 = MULTIPLY(-d7, FIX_0_899976223); 788 tmp2 = MULTIPLY(d3, FIX_0_509795579); 789 z2 = MULTIPLY(-d3, FIX_2_562915447); 790 z5 = MULTIPLY(z3, FIX_1_175875602); 791 z3 = MULTIPLY(-z3, FIX_0_785694958); 792 793 tmp0 += z3; 794 tmp1 = z2 + z5; 795 tmp2 += z3; 796 tmp3 = z1 + z5; 797 } 798 } else { 799 if (d1) { 800 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ 801 z1 = d7 + d1; 802 z5 = MULTIPLY(z1, FIX_1_175875602); 803 804 z1 = MULTIPLY(z1, FIX_0_275899380); 805 z3 = MULTIPLY(-d7, FIX_1_961570560); 806 tmp0 = MULTIPLY(-d7, FIX_1_662939225); 807 z4 = MULTIPLY(-d1, FIX_0_390180644); 808 tmp3 = MULTIPLY(d1, FIX_1_111140466); 809 810 tmp0 += z1; 811 tmp1 = z4 + z5; 812 tmp2 = z3 + z5; 813 tmp3 += z1; 814 } else { 815 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ 816 tmp0 = MULTIPLY(-d7, FIX_1_387039845); 817 tmp1 = MULTIPLY(d7, FIX_1_175875602); 818 tmp2 = MULTIPLY(-d7, FIX_0_785694958); 819 tmp3 = MULTIPLY(d7, FIX_0_275899380); 820 } 821 } 822 } 823 } else { 824 if (d5) { 825 if (d3) { 826 if (d1) { 827 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ 828 z2 = d5 + d3; 829 z4 = d5 + d1; 830 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 831 832 tmp1 = MULTIPLY(d5, FIX_2_053119869); 833 tmp2 = MULTIPLY(d3, FIX_3_072711026); 834 tmp3 = MULTIPLY(d1, FIX_1_501321110); 835 z1 = MULTIPLY(-d1, FIX_0_899976223); 836 z2 = MULTIPLY(-z2, FIX_2_562915447); 837 z3 = MULTIPLY(-d3, FIX_1_961570560); 838 z4 = MULTIPLY(-z4, FIX_0_390180644); 839 840 z3 += z5; 841 z4 += z5; 842 843 tmp0 = z1 + z3; 844 tmp1 += z2 + z4; 845 tmp2 += z2 + z3; 846 tmp3 += z1 + z4; 847 } else { 848 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ 849 z2 = d5 + d3; 850 851 z5 = MULTIPLY(z2, FIX_1_175875602); 852 tmp1 = MULTIPLY(d5, FIX_1_662939225); 853 z4 = MULTIPLY(-d5, FIX_0_390180644); 854 z2 = MULTIPLY(-z2, FIX_1_387039845); 855 tmp2 = MULTIPLY(d3, FIX_1_111140466); 856 z3 = MULTIPLY(-d3, FIX_1_961570560); 857 858 tmp0 = z3 + z5; 859 tmp1 += z2; 860 tmp2 += z2; 861 tmp3 = z4 + z5; 862 } 863 } else { 864 if (d1) { 865 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ 866 z4 = d5 + d1; 867 868 z5 = MULTIPLY(z4, FIX_1_175875602); 869 z1 = MULTIPLY(-d1, FIX_0_899976223); 870 tmp3 = MULTIPLY(d1, FIX_0_601344887); 871 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 872 z2 = MULTIPLY(-d5, FIX_2_562915447); 873 z4 = MULTIPLY(z4, FIX_0_785694958); 874 875 tmp0 = z1 + z5; 876 tmp1 += z4; 877 tmp2 = z2 + z5; 878 tmp3 += z4; 879 } else { 880 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ 881 tmp0 = MULTIPLY(d5, FIX_1_175875602); 882 tmp1 = MULTIPLY(d5, FIX_0_275899380); 883 tmp2 = MULTIPLY(-d5, FIX_1_387039845); 884 tmp3 = MULTIPLY(d5, FIX_0_785694958); 885 } 886 } 887 } else { 888 if (d3) { 889 if (d1) { 890 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ 891 z5 = d1 + d3; 892 tmp3 = MULTIPLY(d1, FIX_0_211164243); 893 tmp2 = MULTIPLY(-d3, FIX_1_451774981); 894 z1 = MULTIPLY(d1, FIX_1_061594337); 895 z2 = MULTIPLY(-d3, FIX_2_172734803); 896 z4 = MULTIPLY(z5, FIX_0_785694958); 897 z5 = MULTIPLY(z5, FIX_1_175875602); 898 899 tmp0 = z1 - z4; 900 tmp1 = z2 + z4; 901 tmp2 += z5; 902 tmp3 += z5; 903 } else { 904 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ 905 tmp0 = MULTIPLY(-d3, FIX_0_785694958); 906 tmp1 = MULTIPLY(-d3, FIX_1_387039845); 907 tmp2 = MULTIPLY(-d3, FIX_0_275899380); 908 tmp3 = MULTIPLY(d3, FIX_1_175875602); 909 } 910 } else { 911 if (d1) { 912 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ 913 tmp0 = MULTIPLY(d1, FIX_0_275899380); 914 tmp1 = MULTIPLY(d1, FIX_0_785694958); 915 tmp2 = MULTIPLY(d1, FIX_1_175875602); 916 tmp3 = MULTIPLY(d1, FIX_1_387039845); 917 } else { 918 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ 919 tmp0 = tmp1 = tmp2 = tmp3 = 0; 920 } 921 } 922 } 923 } 924 925 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 926 927 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, 928 CONST_BITS+PASS1_BITS+3); 929 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, 930 CONST_BITS+PASS1_BITS+3); 931 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, 932 CONST_BITS+PASS1_BITS+3); 933 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, 934 CONST_BITS+PASS1_BITS+3); 935 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, 936 CONST_BITS+PASS1_BITS+3); 937 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, 938 CONST_BITS+PASS1_BITS+3); 939 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, 940 CONST_BITS+PASS1_BITS+3); 941 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, 942 CONST_BITS+PASS1_BITS+3); 943 944 dataptr++; /* advance pointer to next column */ 945 } 946} 947 948#undef DCTSIZE 949#define DCTSIZE 4 950#define DCTSTRIDE 8 951 952void j_rev_dct4(DCTBLOCK data) 953{ 954 int32_t tmp0, tmp1, tmp2, tmp3; 955 int32_t tmp10, tmp11, tmp12, tmp13; 956 int32_t z1; 957 int32_t d0, d2, d4, d6; 958 register DCTELEM *dataptr; 959 int rowctr; 960 961 /* Pass 1: process rows. */ 962 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 963 /* furthermore, we scale the results by 2**PASS1_BITS. */ 964 965 data[0] += 4; 966 967 dataptr = data; 968 969 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 970 /* Due to quantization, we will usually find that many of the input 971 * coefficients are zero, especially the AC terms. We can exploit this 972 * by short-circuiting the IDCT calculation for any row in which all 973 * the AC terms are zero. In that case each output is equal to the 974 * DC coefficient (with scale factor as needed). 975 * With typical images and quantization tables, half or more of the 976 * row DCT calculations can be simplified this way. 977 */ 978 979 register int *idataptr = (int*)dataptr; 980 981 d0 = dataptr[0]; 982 d2 = dataptr[1]; 983 d4 = dataptr[2]; 984 d6 = dataptr[3]; 985 986 if ((d2 | d4 | d6) == 0) { 987 /* AC terms all zero */ 988 if (d0) { 989 /* Compute a 32 bit value to assign. */ 990 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 991 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); 992 993 idataptr[0] = v; 994 idataptr[1] = v; 995 } 996 997 dataptr += DCTSTRIDE; /* advance pointer to next row */ 998 continue; 999 } 1000 1001 /* Even part: reverse the even part of the forward DCT. */ 1002 /* The rotator is sqrt(2)*c(-6). */ 1003 if (d6) { 1004 if (d2) { 1005 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1006 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1007 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1008 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1009 1010 tmp0 = (d0 + d4) << CONST_BITS; 1011 tmp1 = (d0 - d4) << CONST_BITS; 1012 1013 tmp10 = tmp0 + tmp3; 1014 tmp13 = tmp0 - tmp3; 1015 tmp11 = tmp1 + tmp2; 1016 tmp12 = tmp1 - tmp2; 1017 } else { 1018 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1019 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1020 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1021 1022 tmp0 = (d0 + d4) << CONST_BITS; 1023 tmp1 = (d0 - d4) << CONST_BITS; 1024 1025 tmp10 = tmp0 + tmp3; 1026 tmp13 = tmp0 - tmp3; 1027 tmp11 = tmp1 + tmp2; 1028 tmp12 = tmp1 - tmp2; 1029 } 1030 } else { 1031 if (d2) { 1032 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1033 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1034 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1035 1036 tmp0 = (d0 + d4) << CONST_BITS; 1037 tmp1 = (d0 - d4) << CONST_BITS; 1038 1039 tmp10 = tmp0 + tmp3; 1040 tmp13 = tmp0 - tmp3; 1041 tmp11 = tmp1 + tmp2; 1042 tmp12 = tmp1 - tmp2; 1043 } else { 1044 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1045 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1046 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1047 } 1048 } 1049 1050 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1051 1052 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 1053 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 1054 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 1055 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 1056 1057 dataptr += DCTSTRIDE; /* advance pointer to next row */ 1058 } 1059 1060 /* Pass 2: process columns. */ 1061 /* Note that we must descale the results by a factor of 8 == 2**3, */ 1062 /* and also undo the PASS1_BITS scaling. */ 1063 1064 dataptr = data; 1065 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 1066 /* Columns of zeroes can be exploited in the same way as we did with rows. 1067 * However, the row calculation has created many nonzero AC terms, so the 1068 * simplification applies less often (typically 5% to 10% of the time). 1069 * On machines with very fast multiplication, it's possible that the 1070 * test takes more time than it's worth. In that case this section 1071 * may be commented out. 1072 */ 1073 1074 d0 = dataptr[DCTSTRIDE*0]; 1075 d2 = dataptr[DCTSTRIDE*1]; 1076 d4 = dataptr[DCTSTRIDE*2]; 1077 d6 = dataptr[DCTSTRIDE*3]; 1078 1079 /* Even part: reverse the even part of the forward DCT. */ 1080 /* The rotator is sqrt(2)*c(-6). */ 1081 if (d6) { 1082 if (d2) { 1083 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1084 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1085 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1086 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1087 1088 tmp0 = (d0 + d4) << CONST_BITS; 1089 tmp1 = (d0 - d4) << CONST_BITS; 1090 1091 tmp10 = tmp0 + tmp3; 1092 tmp13 = tmp0 - tmp3; 1093 tmp11 = tmp1 + tmp2; 1094 tmp12 = tmp1 - tmp2; 1095 } else { 1096 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1097 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1098 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1099 1100 tmp0 = (d0 + d4) << CONST_BITS; 1101 tmp1 = (d0 - d4) << CONST_BITS; 1102 1103 tmp10 = tmp0 + tmp3; 1104 tmp13 = tmp0 - tmp3; 1105 tmp11 = tmp1 + tmp2; 1106 tmp12 = tmp1 - tmp2; 1107 } 1108 } else { 1109 if (d2) { 1110 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1111 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1112 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1113 1114 tmp0 = (d0 + d4) << CONST_BITS; 1115 tmp1 = (d0 - d4) << CONST_BITS; 1116 1117 tmp10 = tmp0 + tmp3; 1118 tmp13 = tmp0 - tmp3; 1119 tmp11 = tmp1 + tmp2; 1120 tmp12 = tmp1 - tmp2; 1121 } else { 1122 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1123 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1124 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1125 } 1126 } 1127 1128 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1129 1130 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); 1131 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); 1132 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); 1133 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); 1134 1135 dataptr++; /* advance pointer to next column */ 1136 } 1137} 1138 1139void j_rev_dct2(DCTBLOCK data){ 1140 int d00, d01, d10, d11; 1141 1142 data[0] += 4; 1143 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; 1144 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; 1145 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; 1146 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; 1147 1148 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; 1149 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; 1150 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; 1151 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; 1152} 1153 1154void j_rev_dct1(DCTBLOCK data){ 1155 data[0] = (data[0] + 4)>>3; 1156} 1157 1158#undef FIX 1159#undef CONST_BITS 1160