1/* 2 * jrevdct.c 3 * 4 * This file is part of the Independent JPEG Group's software. 5 * 6 * The authors make NO WARRANTY or representation, either express or implied, 7 * with respect to this software, its quality, accuracy, merchantability, or 8 * fitness for a particular purpose. This software is provided "AS IS", and 9 * you, its user, assume the entire risk as to its quality and accuracy. 10 * 11 * This software is copyright (C) 1991, 1992, Thomas G. Lane. 12 * All Rights Reserved except as specified below. 13 * 14 * Permission is hereby granted to use, copy, modify, and distribute this 15 * software (or portions thereof) for any purpose, without fee, subject to 16 * these conditions: 17 * (1) If any part of the source code for this software is distributed, then 18 * this README file must be included, with this copyright and no-warranty 19 * notice unaltered; and any additions, deletions, or changes to the original 20 * files must be clearly indicated in accompanying documentation. 21 * (2) If only executable code is distributed, then the accompanying 22 * documentation must state that "this software is based in part on the work 23 * of the Independent JPEG Group". 24 * (3) Permission for use of this software is granted only if the user accepts 25 * full responsibility for any undesirable consequences; the authors accept 26 * NO LIABILITY for damages of any kind. 27 * 28 * These conditions apply to any software derived from or based on the IJG 29 * code, not just to the unmodified library. If you use our work, you ought 30 * to acknowledge us. 31 * 32 * Permission is NOT granted for the use of any IJG author's name or company 33 * name in advertising or publicity relating to this software or products 34 * derived from it. This software may be referred to only as "the Independent 35 * JPEG Group's software". 36 * 37 * We specifically permit and encourage the use of this software as the basis 38 * of commercial products, provided that all warranty or liability claims are 39 * assumed by the product vendor. 40 * 41 * This file contains the basic inverse-DCT transformation subroutine. 42 * 43 * This implementation is based on an algorithm described in 44 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 45 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 46 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 47 * The primary algorithm described there uses 11 multiplies and 29 adds. 48 * We use their alternate method with 12 multiplies and 32 adds. 49 * The advantage of this method is that no data path contains more than one 50 * multiplication; this allows a very simple and accurate implementation in 51 * scaled fixed-point arithmetic, with a minimal number of shifts. 52 * 53 * I've made lots of modifications to attempt to take advantage of the 54 * sparse nature of the DCT matrices we're getting. Although the logic 55 * is cumbersome, it's straightforward and the resulting code is much 56 * faster. 57 * 58 * A better way to do this would be to pass in the DCT block as a sparse 59 * matrix, perhaps with the difference cases encoded. 60 */ 61 62/** 63 * @file 64 * Independent JPEG Group's LLM idct. 65 */ 66 67#include "libavutil/common.h" 68#include "dsputil.h" 69 70#define EIGHT_BIT_SAMPLES 71 72#define DCTSIZE 8 73#define DCTSIZE2 64 74 75#define GLOBAL 76 77#define RIGHT_SHIFT(x, n) ((x) >> (n)) 78 79typedef DCTELEM DCTBLOCK[DCTSIZE2]; 80 81#define CONST_BITS 13 82 83/* 84 * This routine is specialized to the case DCTSIZE = 8. 85 */ 86 87#if DCTSIZE != 8 88 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 89#endif 90 91 92/* 93 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT 94 * on each column. Direct algorithms are also available, but they are 95 * much more complex and seem not to be any faster when reduced to code. 96 * 97 * The poop on this scaling stuff is as follows: 98 * 99 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 100 * larger than the true IDCT outputs. The final outputs are therefore 101 * a factor of N larger than desired; since N=8 this can be cured by 102 * a simple right shift at the end of the algorithm. The advantage of 103 * this arrangement is that we save two multiplications per 1-D IDCT, 104 * because the y0 and y4 inputs need not be divided by sqrt(N). 105 * 106 * We have to do addition and subtraction of the integer inputs, which 107 * is no problem, and multiplication by fractional constants, which is 108 * a problem to do in integer arithmetic. We multiply all the constants 109 * by CONST_SCALE and convert them to integer constants (thus retaining 110 * CONST_BITS bits of precision in the constants). After doing a 111 * multiplication we have to divide the product by CONST_SCALE, with proper 112 * rounding, to produce the correct output. This division can be done 113 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 114 * as long as possible so that partial sums can be added together with 115 * full fractional precision. 116 * 117 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 118 * they are represented to better-than-integral precision. These outputs 119 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 120 * with the recommended scaling. (To scale up 12-bit sample data further, an 121 * intermediate int32 array would be needed.) 122 * 123 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 124 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 125 * shows that the values given below are the most effective. 126 */ 127 128#ifdef EIGHT_BIT_SAMPLES 129#define PASS1_BITS 2 130#else 131#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 132#endif 133 134#define ONE ((int32_t) 1) 135 136#define CONST_SCALE (ONE << CONST_BITS) 137 138/* Convert a positive real constant to an integer scaled by CONST_SCALE. 139 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, 140 * you will pay a significant penalty in run time. In that case, figure 141 * the correct integer constant values and insert them by hand. 142 */ 143 144/* Actually FIX is no longer used, we precomputed them all */ 145#define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) 146 147/* Descale and correctly round an int32_t value that's scaled by N bits. 148 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding 149 * the fudge factor is correct for either sign of X. 150 */ 151 152#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) 153 154/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. 155 * For 8-bit samples with the recommended scaling, all the variable 156 * and constant values involved are no more than 16 bits wide, so a 157 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; 158 * this provides a useful speedup on many machines. 159 * There is no way to specify a 16x16->32 multiply in portable C, but 160 * some C compilers will do the right thing if you provide the correct 161 * combination of casts. 162 * NB: for 12-bit samples, a full 32-bit multiplication will be needed. 163 */ 164 165#ifdef EIGHT_BIT_SAMPLES 166#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ 167#define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) 168#endif 169#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ 170#define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) 171#endif 172#endif 173 174#ifndef MULTIPLY /* default definition */ 175#define MULTIPLY(var,const) ((var) * (const)) 176#endif 177 178 179/* 180 Unlike our decoder where we approximate the FIXes, we need to use exact 181ones here or successive P-frames will drift too much with Reference frame coding 182*/ 183#define FIX_0_211164243 1730 184#define FIX_0_275899380 2260 185#define FIX_0_298631336 2446 186#define FIX_0_390180644 3196 187#define FIX_0_509795579 4176 188#define FIX_0_541196100 4433 189#define FIX_0_601344887 4926 190#define FIX_0_765366865 6270 191#define FIX_0_785694958 6436 192#define FIX_0_899976223 7373 193#define FIX_1_061594337 8697 194#define FIX_1_111140466 9102 195#define FIX_1_175875602 9633 196#define FIX_1_306562965 10703 197#define FIX_1_387039845 11363 198#define FIX_1_451774981 11893 199#define FIX_1_501321110 12299 200#define FIX_1_662939225 13623 201#define FIX_1_847759065 15137 202#define FIX_1_961570560 16069 203#define FIX_2_053119869 16819 204#define FIX_2_172734803 17799 205#define FIX_2_562915447 20995 206#define FIX_3_072711026 25172 207 208/* 209 * Perform the inverse DCT on one block of coefficients. 210 */ 211 212void j_rev_dct(DCTBLOCK data) 213{ 214 int32_t tmp0, tmp1, tmp2, tmp3; 215 int32_t tmp10, tmp11, tmp12, tmp13; 216 int32_t z1, z2, z3, z4, z5; 217 int32_t d0, d1, d2, d3, d4, d5, d6, d7; 218 register DCTELEM *dataptr; 219 int rowctr; 220 221 /* Pass 1: process rows. */ 222 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 223 /* furthermore, we scale the results by 2**PASS1_BITS. */ 224 225 dataptr = data; 226 227 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 228 /* Due to quantization, we will usually find that many of the input 229 * coefficients are zero, especially the AC terms. We can exploit this 230 * by short-circuiting the IDCT calculation for any row in which all 231 * the AC terms are zero. In that case each output is equal to the 232 * DC coefficient (with scale factor as needed). 233 * With typical images and quantization tables, half or more of the 234 * row DCT calculations can be simplified this way. 235 */ 236 237 register int *idataptr = (int*)dataptr; 238 239 /* WARNING: we do the same permutation as MMX idct to simplify the 240 video core */ 241 d0 = dataptr[0]; 242 d2 = dataptr[1]; 243 d4 = dataptr[2]; 244 d6 = dataptr[3]; 245 d1 = dataptr[4]; 246 d3 = dataptr[5]; 247 d5 = dataptr[6]; 248 d7 = dataptr[7]; 249 250 if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { 251 /* AC terms all zero */ 252 if (d0) { 253 /* Compute a 32 bit value to assign. */ 254 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 255 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); 256 257 idataptr[0] = v; 258 idataptr[1] = v; 259 idataptr[2] = v; 260 idataptr[3] = v; 261 } 262 263 dataptr += DCTSIZE; /* advance pointer to next row */ 264 continue; 265 } 266 267 /* Even part: reverse the even part of the forward DCT. */ 268 /* The rotator is sqrt(2)*c(-6). */ 269{ 270 if (d6) { 271 if (d2) { 272 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 273 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 274 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 275 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 276 277 tmp0 = (d0 + d4) << CONST_BITS; 278 tmp1 = (d0 - d4) << CONST_BITS; 279 280 tmp10 = tmp0 + tmp3; 281 tmp13 = tmp0 - tmp3; 282 tmp11 = tmp1 + tmp2; 283 tmp12 = tmp1 - tmp2; 284 } else { 285 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 286 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 287 tmp3 = MULTIPLY(d6, FIX_0_541196100); 288 289 tmp0 = (d0 + d4) << CONST_BITS; 290 tmp1 = (d0 - d4) << CONST_BITS; 291 292 tmp10 = tmp0 + tmp3; 293 tmp13 = tmp0 - tmp3; 294 tmp11 = tmp1 + tmp2; 295 tmp12 = tmp1 - tmp2; 296 } 297 } else { 298 if (d2) { 299 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 300 tmp2 = MULTIPLY(d2, FIX_0_541196100); 301 tmp3 = MULTIPLY(d2, FIX_1_306562965); 302 303 tmp0 = (d0 + d4) << CONST_BITS; 304 tmp1 = (d0 - d4) << CONST_BITS; 305 306 tmp10 = tmp0 + tmp3; 307 tmp13 = tmp0 - tmp3; 308 tmp11 = tmp1 + tmp2; 309 tmp12 = tmp1 - tmp2; 310 } else { 311 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 312 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 313 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 314 } 315 } 316 317 /* Odd part per figure 8; the matrix is unitary and hence its 318 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 319 */ 320 321 if (d7) { 322 if (d5) { 323 if (d3) { 324 if (d1) { 325 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ 326 z1 = d7 + d1; 327 z2 = d5 + d3; 328 z3 = d7 + d3; 329 z4 = d5 + d1; 330 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 331 332 tmp0 = MULTIPLY(d7, FIX_0_298631336); 333 tmp1 = MULTIPLY(d5, FIX_2_053119869); 334 tmp2 = MULTIPLY(d3, FIX_3_072711026); 335 tmp3 = MULTIPLY(d1, FIX_1_501321110); 336 z1 = MULTIPLY(-z1, FIX_0_899976223); 337 z2 = MULTIPLY(-z2, FIX_2_562915447); 338 z3 = MULTIPLY(-z3, FIX_1_961570560); 339 z4 = MULTIPLY(-z4, FIX_0_390180644); 340 341 z3 += z5; 342 z4 += z5; 343 344 tmp0 += z1 + z3; 345 tmp1 += z2 + z4; 346 tmp2 += z2 + z3; 347 tmp3 += z1 + z4; 348 } else { 349 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ 350 z2 = d5 + d3; 351 z3 = d7 + d3; 352 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 353 354 tmp0 = MULTIPLY(d7, FIX_0_298631336); 355 tmp1 = MULTIPLY(d5, FIX_2_053119869); 356 tmp2 = MULTIPLY(d3, FIX_3_072711026); 357 z1 = MULTIPLY(-d7, FIX_0_899976223); 358 z2 = MULTIPLY(-z2, FIX_2_562915447); 359 z3 = MULTIPLY(-z3, FIX_1_961570560); 360 z4 = MULTIPLY(-d5, FIX_0_390180644); 361 362 z3 += z5; 363 z4 += z5; 364 365 tmp0 += z1 + z3; 366 tmp1 += z2 + z4; 367 tmp2 += z2 + z3; 368 tmp3 = z1 + z4; 369 } 370 } else { 371 if (d1) { 372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ 373 z1 = d7 + d1; 374 z4 = d5 + d1; 375 z5 = MULTIPLY(d7 + z4, FIX_1_175875602); 376 377 tmp0 = MULTIPLY(d7, FIX_0_298631336); 378 tmp1 = MULTIPLY(d5, FIX_2_053119869); 379 tmp3 = MULTIPLY(d1, FIX_1_501321110); 380 z1 = MULTIPLY(-z1, FIX_0_899976223); 381 z2 = MULTIPLY(-d5, FIX_2_562915447); 382 z3 = MULTIPLY(-d7, FIX_1_961570560); 383 z4 = MULTIPLY(-z4, FIX_0_390180644); 384 385 z3 += z5; 386 z4 += z5; 387 388 tmp0 += z1 + z3; 389 tmp1 += z2 + z4; 390 tmp2 = z2 + z3; 391 tmp3 += z1 + z4; 392 } else { 393 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ 394 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 395 z1 = MULTIPLY(-d7, FIX_0_899976223); 396 z3 = MULTIPLY(-d7, FIX_1_961570560); 397 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 398 z2 = MULTIPLY(-d5, FIX_2_562915447); 399 z4 = MULTIPLY(-d5, FIX_0_390180644); 400 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 401 402 z3 += z5; 403 z4 += z5; 404 405 tmp0 += z3; 406 tmp1 += z4; 407 tmp2 = z2 + z3; 408 tmp3 = z1 + z4; 409 } 410 } 411 } else { 412 if (d3) { 413 if (d1) { 414 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ 415 z1 = d7 + d1; 416 z3 = d7 + d3; 417 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 418 419 tmp0 = MULTIPLY(d7, FIX_0_298631336); 420 tmp2 = MULTIPLY(d3, FIX_3_072711026); 421 tmp3 = MULTIPLY(d1, FIX_1_501321110); 422 z1 = MULTIPLY(-z1, FIX_0_899976223); 423 z2 = MULTIPLY(-d3, FIX_2_562915447); 424 z3 = MULTIPLY(-z3, FIX_1_961570560); 425 z4 = MULTIPLY(-d1, FIX_0_390180644); 426 427 z3 += z5; 428 z4 += z5; 429 430 tmp0 += z1 + z3; 431 tmp1 = z2 + z4; 432 tmp2 += z2 + z3; 433 tmp3 += z1 + z4; 434 } else { 435 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ 436 z3 = d7 + d3; 437 438 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 439 z1 = MULTIPLY(-d7, FIX_0_899976223); 440 tmp2 = MULTIPLY(d3, FIX_0_509795579); 441 z2 = MULTIPLY(-d3, FIX_2_562915447); 442 z5 = MULTIPLY(z3, FIX_1_175875602); 443 z3 = MULTIPLY(-z3, FIX_0_785694958); 444 445 tmp0 += z3; 446 tmp1 = z2 + z5; 447 tmp2 += z3; 448 tmp3 = z1 + z5; 449 } 450 } else { 451 if (d1) { 452 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ 453 z1 = d7 + d1; 454 z5 = MULTIPLY(z1, FIX_1_175875602); 455 456 z1 = MULTIPLY(z1, FIX_0_275899380); 457 z3 = MULTIPLY(-d7, FIX_1_961570560); 458 tmp0 = MULTIPLY(-d7, FIX_1_662939225); 459 z4 = MULTIPLY(-d1, FIX_0_390180644); 460 tmp3 = MULTIPLY(d1, FIX_1_111140466); 461 462 tmp0 += z1; 463 tmp1 = z4 + z5; 464 tmp2 = z3 + z5; 465 tmp3 += z1; 466 } else { 467 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ 468 tmp0 = MULTIPLY(-d7, FIX_1_387039845); 469 tmp1 = MULTIPLY(d7, FIX_1_175875602); 470 tmp2 = MULTIPLY(-d7, FIX_0_785694958); 471 tmp3 = MULTIPLY(d7, FIX_0_275899380); 472 } 473 } 474 } 475 } else { 476 if (d5) { 477 if (d3) { 478 if (d1) { 479 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ 480 z2 = d5 + d3; 481 z4 = d5 + d1; 482 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 483 484 tmp1 = MULTIPLY(d5, FIX_2_053119869); 485 tmp2 = MULTIPLY(d3, FIX_3_072711026); 486 tmp3 = MULTIPLY(d1, FIX_1_501321110); 487 z1 = MULTIPLY(-d1, FIX_0_899976223); 488 z2 = MULTIPLY(-z2, FIX_2_562915447); 489 z3 = MULTIPLY(-d3, FIX_1_961570560); 490 z4 = MULTIPLY(-z4, FIX_0_390180644); 491 492 z3 += z5; 493 z4 += z5; 494 495 tmp0 = z1 + z3; 496 tmp1 += z2 + z4; 497 tmp2 += z2 + z3; 498 tmp3 += z1 + z4; 499 } else { 500 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ 501 z2 = d5 + d3; 502 503 z5 = MULTIPLY(z2, FIX_1_175875602); 504 tmp1 = MULTIPLY(d5, FIX_1_662939225); 505 z4 = MULTIPLY(-d5, FIX_0_390180644); 506 z2 = MULTIPLY(-z2, FIX_1_387039845); 507 tmp2 = MULTIPLY(d3, FIX_1_111140466); 508 z3 = MULTIPLY(-d3, FIX_1_961570560); 509 510 tmp0 = z3 + z5; 511 tmp1 += z2; 512 tmp2 += z2; 513 tmp3 = z4 + z5; 514 } 515 } else { 516 if (d1) { 517 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ 518 z4 = d5 + d1; 519 520 z5 = MULTIPLY(z4, FIX_1_175875602); 521 z1 = MULTIPLY(-d1, FIX_0_899976223); 522 tmp3 = MULTIPLY(d1, FIX_0_601344887); 523 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 524 z2 = MULTIPLY(-d5, FIX_2_562915447); 525 z4 = MULTIPLY(z4, FIX_0_785694958); 526 527 tmp0 = z1 + z5; 528 tmp1 += z4; 529 tmp2 = z2 + z5; 530 tmp3 += z4; 531 } else { 532 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ 533 tmp0 = MULTIPLY(d5, FIX_1_175875602); 534 tmp1 = MULTIPLY(d5, FIX_0_275899380); 535 tmp2 = MULTIPLY(-d5, FIX_1_387039845); 536 tmp3 = MULTIPLY(d5, FIX_0_785694958); 537 } 538 } 539 } else { 540 if (d3) { 541 if (d1) { 542 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ 543 z5 = d1 + d3; 544 tmp3 = MULTIPLY(d1, FIX_0_211164243); 545 tmp2 = MULTIPLY(-d3, FIX_1_451774981); 546 z1 = MULTIPLY(d1, FIX_1_061594337); 547 z2 = MULTIPLY(-d3, FIX_2_172734803); 548 z4 = MULTIPLY(z5, FIX_0_785694958); 549 z5 = MULTIPLY(z5, FIX_1_175875602); 550 551 tmp0 = z1 - z4; 552 tmp1 = z2 + z4; 553 tmp2 += z5; 554 tmp3 += z5; 555 } else { 556 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ 557 tmp0 = MULTIPLY(-d3, FIX_0_785694958); 558 tmp1 = MULTIPLY(-d3, FIX_1_387039845); 559 tmp2 = MULTIPLY(-d3, FIX_0_275899380); 560 tmp3 = MULTIPLY(d3, FIX_1_175875602); 561 } 562 } else { 563 if (d1) { 564 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ 565 tmp0 = MULTIPLY(d1, FIX_0_275899380); 566 tmp1 = MULTIPLY(d1, FIX_0_785694958); 567 tmp2 = MULTIPLY(d1, FIX_1_175875602); 568 tmp3 = MULTIPLY(d1, FIX_1_387039845); 569 } else { 570 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ 571 tmp0 = tmp1 = tmp2 = tmp3 = 0; 572 } 573 } 574 } 575 } 576} 577 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 578 579 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 580 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 581 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 582 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 583 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 584 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 585 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 586 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 587 588 dataptr += DCTSIZE; /* advance pointer to next row */ 589 } 590 591 /* Pass 2: process columns. */ 592 /* Note that we must descale the results by a factor of 8 == 2**3, */ 593 /* and also undo the PASS1_BITS scaling. */ 594 595 dataptr = data; 596 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 597 /* Columns of zeroes can be exploited in the same way as we did with rows. 598 * However, the row calculation has created many nonzero AC terms, so the 599 * simplification applies less often (typically 5% to 10% of the time). 600 * On machines with very fast multiplication, it's possible that the 601 * test takes more time than it's worth. In that case this section 602 * may be commented out. 603 */ 604 605 d0 = dataptr[DCTSIZE*0]; 606 d1 = dataptr[DCTSIZE*1]; 607 d2 = dataptr[DCTSIZE*2]; 608 d3 = dataptr[DCTSIZE*3]; 609 d4 = dataptr[DCTSIZE*4]; 610 d5 = dataptr[DCTSIZE*5]; 611 d6 = dataptr[DCTSIZE*6]; 612 d7 = dataptr[DCTSIZE*7]; 613 614 /* Even part: reverse the even part of the forward DCT. */ 615 /* The rotator is sqrt(2)*c(-6). */ 616 if (d6) { 617 if (d2) { 618 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 619 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 620 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 621 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 622 623 tmp0 = (d0 + d4) << CONST_BITS; 624 tmp1 = (d0 - d4) << CONST_BITS; 625 626 tmp10 = tmp0 + tmp3; 627 tmp13 = tmp0 - tmp3; 628 tmp11 = tmp1 + tmp2; 629 tmp12 = tmp1 - tmp2; 630 } else { 631 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 632 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 633 tmp3 = MULTIPLY(d6, FIX_0_541196100); 634 635 tmp0 = (d0 + d4) << CONST_BITS; 636 tmp1 = (d0 - d4) << CONST_BITS; 637 638 tmp10 = tmp0 + tmp3; 639 tmp13 = tmp0 - tmp3; 640 tmp11 = tmp1 + tmp2; 641 tmp12 = tmp1 - tmp2; 642 } 643 } else { 644 if (d2) { 645 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 646 tmp2 = MULTIPLY(d2, FIX_0_541196100); 647 tmp3 = MULTIPLY(d2, FIX_1_306562965); 648 649 tmp0 = (d0 + d4) << CONST_BITS; 650 tmp1 = (d0 - d4) << CONST_BITS; 651 652 tmp10 = tmp0 + tmp3; 653 tmp13 = tmp0 - tmp3; 654 tmp11 = tmp1 + tmp2; 655 tmp12 = tmp1 - tmp2; 656 } else { 657 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 658 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 659 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 660 } 661 } 662 663 /* Odd part per figure 8; the matrix is unitary and hence its 664 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 665 */ 666 if (d7) { 667 if (d5) { 668 if (d3) { 669 if (d1) { 670 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ 671 z1 = d7 + d1; 672 z2 = d5 + d3; 673 z3 = d7 + d3; 674 z4 = d5 + d1; 675 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 676 677 tmp0 = MULTIPLY(d7, FIX_0_298631336); 678 tmp1 = MULTIPLY(d5, FIX_2_053119869); 679 tmp2 = MULTIPLY(d3, FIX_3_072711026); 680 tmp3 = MULTIPLY(d1, FIX_1_501321110); 681 z1 = MULTIPLY(-z1, FIX_0_899976223); 682 z2 = MULTIPLY(-z2, FIX_2_562915447); 683 z3 = MULTIPLY(-z3, FIX_1_961570560); 684 z4 = MULTIPLY(-z4, FIX_0_390180644); 685 686 z3 += z5; 687 z4 += z5; 688 689 tmp0 += z1 + z3; 690 tmp1 += z2 + z4; 691 tmp2 += z2 + z3; 692 tmp3 += z1 + z4; 693 } else { 694 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ 695 z2 = d5 + d3; 696 z3 = d7 + d3; 697 z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 698 699 tmp0 = MULTIPLY(d7, FIX_0_298631336); 700 tmp1 = MULTIPLY(d5, FIX_2_053119869); 701 tmp2 = MULTIPLY(d3, FIX_3_072711026); 702 z1 = MULTIPLY(-d7, FIX_0_899976223); 703 z2 = MULTIPLY(-z2, FIX_2_562915447); 704 z3 = MULTIPLY(-z3, FIX_1_961570560); 705 z4 = MULTIPLY(-d5, FIX_0_390180644); 706 707 z3 += z5; 708 z4 += z5; 709 710 tmp0 += z1 + z3; 711 tmp1 += z2 + z4; 712 tmp2 += z2 + z3; 713 tmp3 = z1 + z4; 714 } 715 } else { 716 if (d1) { 717 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ 718 z1 = d7 + d1; 719 z3 = d7; 720 z4 = d5 + d1; 721 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 722 723 tmp0 = MULTIPLY(d7, FIX_0_298631336); 724 tmp1 = MULTIPLY(d5, FIX_2_053119869); 725 tmp3 = MULTIPLY(d1, FIX_1_501321110); 726 z1 = MULTIPLY(-z1, FIX_0_899976223); 727 z2 = MULTIPLY(-d5, FIX_2_562915447); 728 z3 = MULTIPLY(-d7, FIX_1_961570560); 729 z4 = MULTIPLY(-z4, FIX_0_390180644); 730 731 z3 += z5; 732 z4 += z5; 733 734 tmp0 += z1 + z3; 735 tmp1 += z2 + z4; 736 tmp2 = z2 + z3; 737 tmp3 += z1 + z4; 738 } else { 739 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ 740 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 741 z1 = MULTIPLY(-d7, FIX_0_899976223); 742 z3 = MULTIPLY(-d7, FIX_1_961570560); 743 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 744 z2 = MULTIPLY(-d5, FIX_2_562915447); 745 z4 = MULTIPLY(-d5, FIX_0_390180644); 746 z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 747 748 z3 += z5; 749 z4 += z5; 750 751 tmp0 += z3; 752 tmp1 += z4; 753 tmp2 = z2 + z3; 754 tmp3 = z1 + z4; 755 } 756 } 757 } else { 758 if (d3) { 759 if (d1) { 760 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ 761 z1 = d7 + d1; 762 z3 = d7 + d3; 763 z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 764 765 tmp0 = MULTIPLY(d7, FIX_0_298631336); 766 tmp2 = MULTIPLY(d3, FIX_3_072711026); 767 tmp3 = MULTIPLY(d1, FIX_1_501321110); 768 z1 = MULTIPLY(-z1, FIX_0_899976223); 769 z2 = MULTIPLY(-d3, FIX_2_562915447); 770 z3 = MULTIPLY(-z3, FIX_1_961570560); 771 z4 = MULTIPLY(-d1, FIX_0_390180644); 772 773 z3 += z5; 774 z4 += z5; 775 776 tmp0 += z1 + z3; 777 tmp1 = z2 + z4; 778 tmp2 += z2 + z3; 779 tmp3 += z1 + z4; 780 } else { 781 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ 782 z3 = d7 + d3; 783 784 tmp0 = MULTIPLY(-d7, FIX_0_601344887); 785 z1 = MULTIPLY(-d7, FIX_0_899976223); 786 tmp2 = MULTIPLY(d3, FIX_0_509795579); 787 z2 = MULTIPLY(-d3, FIX_2_562915447); 788 z5 = MULTIPLY(z3, FIX_1_175875602); 789 z3 = MULTIPLY(-z3, FIX_0_785694958); 790 791 tmp0 += z3; 792 tmp1 = z2 + z5; 793 tmp2 += z3; 794 tmp3 = z1 + z5; 795 } 796 } else { 797 if (d1) { 798 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ 799 z1 = d7 + d1; 800 z5 = MULTIPLY(z1, FIX_1_175875602); 801 802 z1 = MULTIPLY(z1, FIX_0_275899380); 803 z3 = MULTIPLY(-d7, FIX_1_961570560); 804 tmp0 = MULTIPLY(-d7, FIX_1_662939225); 805 z4 = MULTIPLY(-d1, FIX_0_390180644); 806 tmp3 = MULTIPLY(d1, FIX_1_111140466); 807 808 tmp0 += z1; 809 tmp1 = z4 + z5; 810 tmp2 = z3 + z5; 811 tmp3 += z1; 812 } else { 813 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ 814 tmp0 = MULTIPLY(-d7, FIX_1_387039845); 815 tmp1 = MULTIPLY(d7, FIX_1_175875602); 816 tmp2 = MULTIPLY(-d7, FIX_0_785694958); 817 tmp3 = MULTIPLY(d7, FIX_0_275899380); 818 } 819 } 820 } 821 } else { 822 if (d5) { 823 if (d3) { 824 if (d1) { 825 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ 826 z2 = d5 + d3; 827 z4 = d5 + d1; 828 z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 829 830 tmp1 = MULTIPLY(d5, FIX_2_053119869); 831 tmp2 = MULTIPLY(d3, FIX_3_072711026); 832 tmp3 = MULTIPLY(d1, FIX_1_501321110); 833 z1 = MULTIPLY(-d1, FIX_0_899976223); 834 z2 = MULTIPLY(-z2, FIX_2_562915447); 835 z3 = MULTIPLY(-d3, FIX_1_961570560); 836 z4 = MULTIPLY(-z4, FIX_0_390180644); 837 838 z3 += z5; 839 z4 += z5; 840 841 tmp0 = z1 + z3; 842 tmp1 += z2 + z4; 843 tmp2 += z2 + z3; 844 tmp3 += z1 + z4; 845 } else { 846 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ 847 z2 = d5 + d3; 848 849 z5 = MULTIPLY(z2, FIX_1_175875602); 850 tmp1 = MULTIPLY(d5, FIX_1_662939225); 851 z4 = MULTIPLY(-d5, FIX_0_390180644); 852 z2 = MULTIPLY(-z2, FIX_1_387039845); 853 tmp2 = MULTIPLY(d3, FIX_1_111140466); 854 z3 = MULTIPLY(-d3, FIX_1_961570560); 855 856 tmp0 = z3 + z5; 857 tmp1 += z2; 858 tmp2 += z2; 859 tmp3 = z4 + z5; 860 } 861 } else { 862 if (d1) { 863 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ 864 z4 = d5 + d1; 865 866 z5 = MULTIPLY(z4, FIX_1_175875602); 867 z1 = MULTIPLY(-d1, FIX_0_899976223); 868 tmp3 = MULTIPLY(d1, FIX_0_601344887); 869 tmp1 = MULTIPLY(-d5, FIX_0_509795579); 870 z2 = MULTIPLY(-d5, FIX_2_562915447); 871 z4 = MULTIPLY(z4, FIX_0_785694958); 872 873 tmp0 = z1 + z5; 874 tmp1 += z4; 875 tmp2 = z2 + z5; 876 tmp3 += z4; 877 } else { 878 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ 879 tmp0 = MULTIPLY(d5, FIX_1_175875602); 880 tmp1 = MULTIPLY(d5, FIX_0_275899380); 881 tmp2 = MULTIPLY(-d5, FIX_1_387039845); 882 tmp3 = MULTIPLY(d5, FIX_0_785694958); 883 } 884 } 885 } else { 886 if (d3) { 887 if (d1) { 888 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ 889 z5 = d1 + d3; 890 tmp3 = MULTIPLY(d1, FIX_0_211164243); 891 tmp2 = MULTIPLY(-d3, FIX_1_451774981); 892 z1 = MULTIPLY(d1, FIX_1_061594337); 893 z2 = MULTIPLY(-d3, FIX_2_172734803); 894 z4 = MULTIPLY(z5, FIX_0_785694958); 895 z5 = MULTIPLY(z5, FIX_1_175875602); 896 897 tmp0 = z1 - z4; 898 tmp1 = z2 + z4; 899 tmp2 += z5; 900 tmp3 += z5; 901 } else { 902 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ 903 tmp0 = MULTIPLY(-d3, FIX_0_785694958); 904 tmp1 = MULTIPLY(-d3, FIX_1_387039845); 905 tmp2 = MULTIPLY(-d3, FIX_0_275899380); 906 tmp3 = MULTIPLY(d3, FIX_1_175875602); 907 } 908 } else { 909 if (d1) { 910 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ 911 tmp0 = MULTIPLY(d1, FIX_0_275899380); 912 tmp1 = MULTIPLY(d1, FIX_0_785694958); 913 tmp2 = MULTIPLY(d1, FIX_1_175875602); 914 tmp3 = MULTIPLY(d1, FIX_1_387039845); 915 } else { 916 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ 917 tmp0 = tmp1 = tmp2 = tmp3 = 0; 918 } 919 } 920 } 921 } 922 923 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 924 925 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, 926 CONST_BITS+PASS1_BITS+3); 927 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, 928 CONST_BITS+PASS1_BITS+3); 929 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, 930 CONST_BITS+PASS1_BITS+3); 931 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, 932 CONST_BITS+PASS1_BITS+3); 933 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, 934 CONST_BITS+PASS1_BITS+3); 935 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, 936 CONST_BITS+PASS1_BITS+3); 937 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, 938 CONST_BITS+PASS1_BITS+3); 939 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, 940 CONST_BITS+PASS1_BITS+3); 941 942 dataptr++; /* advance pointer to next column */ 943 } 944} 945 946#undef DCTSIZE 947#define DCTSIZE 4 948#define DCTSTRIDE 8 949 950void j_rev_dct4(DCTBLOCK data) 951{ 952 int32_t tmp0, tmp1, tmp2, tmp3; 953 int32_t tmp10, tmp11, tmp12, tmp13; 954 int32_t z1; 955 int32_t d0, d2, d4, d6; 956 register DCTELEM *dataptr; 957 int rowctr; 958 959 /* Pass 1: process rows. */ 960 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 961 /* furthermore, we scale the results by 2**PASS1_BITS. */ 962 963 data[0] += 4; 964 965 dataptr = data; 966 967 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 968 /* Due to quantization, we will usually find that many of the input 969 * coefficients are zero, especially the AC terms. We can exploit this 970 * by short-circuiting the IDCT calculation for any row in which all 971 * the AC terms are zero. In that case each output is equal to the 972 * DC coefficient (with scale factor as needed). 973 * With typical images and quantization tables, half or more of the 974 * row DCT calculations can be simplified this way. 975 */ 976 977 register int *idataptr = (int*)dataptr; 978 979 d0 = dataptr[0]; 980 d2 = dataptr[1]; 981 d4 = dataptr[2]; 982 d6 = dataptr[3]; 983 984 if ((d2 | d4 | d6) == 0) { 985 /* AC terms all zero */ 986 if (d0) { 987 /* Compute a 32 bit value to assign. */ 988 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 989 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); 990 991 idataptr[0] = v; 992 idataptr[1] = v; 993 } 994 995 dataptr += DCTSTRIDE; /* advance pointer to next row */ 996 continue; 997 } 998 999 /* Even part: reverse the even part of the forward DCT. */ 1000 /* The rotator is sqrt(2)*c(-6). */ 1001 if (d6) { 1002 if (d2) { 1003 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1004 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1005 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1006 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1007 1008 tmp0 = (d0 + d4) << CONST_BITS; 1009 tmp1 = (d0 - d4) << CONST_BITS; 1010 1011 tmp10 = tmp0 + tmp3; 1012 tmp13 = tmp0 - tmp3; 1013 tmp11 = tmp1 + tmp2; 1014 tmp12 = tmp1 - tmp2; 1015 } else { 1016 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1017 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1018 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1019 1020 tmp0 = (d0 + d4) << CONST_BITS; 1021 tmp1 = (d0 - d4) << CONST_BITS; 1022 1023 tmp10 = tmp0 + tmp3; 1024 tmp13 = tmp0 - tmp3; 1025 tmp11 = tmp1 + tmp2; 1026 tmp12 = tmp1 - tmp2; 1027 } 1028 } else { 1029 if (d2) { 1030 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1031 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1032 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1033 1034 tmp0 = (d0 + d4) << CONST_BITS; 1035 tmp1 = (d0 - d4) << CONST_BITS; 1036 1037 tmp10 = tmp0 + tmp3; 1038 tmp13 = tmp0 - tmp3; 1039 tmp11 = tmp1 + tmp2; 1040 tmp12 = tmp1 - tmp2; 1041 } else { 1042 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1043 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1044 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1045 } 1046 } 1047 1048 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1049 1050 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 1051 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); 1052 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); 1053 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); 1054 1055 dataptr += DCTSTRIDE; /* advance pointer to next row */ 1056 } 1057 1058 /* Pass 2: process columns. */ 1059 /* Note that we must descale the results by a factor of 8 == 2**3, */ 1060 /* and also undo the PASS1_BITS scaling. */ 1061 1062 dataptr = data; 1063 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 1064 /* Columns of zeroes can be exploited in the same way as we did with rows. 1065 * However, the row calculation has created many nonzero AC terms, so the 1066 * simplification applies less often (typically 5% to 10% of the time). 1067 * On machines with very fast multiplication, it's possible that the 1068 * test takes more time than it's worth. In that case this section 1069 * may be commented out. 1070 */ 1071 1072 d0 = dataptr[DCTSTRIDE*0]; 1073 d2 = dataptr[DCTSTRIDE*1]; 1074 d4 = dataptr[DCTSTRIDE*2]; 1075 d6 = dataptr[DCTSTRIDE*3]; 1076 1077 /* Even part: reverse the even part of the forward DCT. */ 1078 /* The rotator is sqrt(2)*c(-6). */ 1079 if (d6) { 1080 if (d2) { 1081 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1082 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1083 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1084 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1085 1086 tmp0 = (d0 + d4) << CONST_BITS; 1087 tmp1 = (d0 - d4) << CONST_BITS; 1088 1089 tmp10 = tmp0 + tmp3; 1090 tmp13 = tmp0 - tmp3; 1091 tmp11 = tmp1 + tmp2; 1092 tmp12 = tmp1 - tmp2; 1093 } else { 1094 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1095 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1096 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1097 1098 tmp0 = (d0 + d4) << CONST_BITS; 1099 tmp1 = (d0 - d4) << CONST_BITS; 1100 1101 tmp10 = tmp0 + tmp3; 1102 tmp13 = tmp0 - tmp3; 1103 tmp11 = tmp1 + tmp2; 1104 tmp12 = tmp1 - tmp2; 1105 } 1106 } else { 1107 if (d2) { 1108 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1109 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1110 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1111 1112 tmp0 = (d0 + d4) << CONST_BITS; 1113 tmp1 = (d0 - d4) << CONST_BITS; 1114 1115 tmp10 = tmp0 + tmp3; 1116 tmp13 = tmp0 - tmp3; 1117 tmp11 = tmp1 + tmp2; 1118 tmp12 = tmp1 - tmp2; 1119 } else { 1120 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1121 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1122 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1123 } 1124 } 1125 1126 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1127 1128 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); 1129 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); 1130 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); 1131 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); 1132 1133 dataptr++; /* advance pointer to next column */ 1134 } 1135} 1136 1137void j_rev_dct2(DCTBLOCK data){ 1138 int d00, d01, d10, d11; 1139 1140 data[0] += 4; 1141 d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; 1142 d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; 1143 d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; 1144 d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; 1145 1146 data[0+0*DCTSTRIDE]= (d00 + d10)>>3; 1147 data[1+0*DCTSTRIDE]= (d01 + d11)>>3; 1148 data[0+1*DCTSTRIDE]= (d00 - d10)>>3; 1149 data[1+1*DCTSTRIDE]= (d01 - d11)>>3; 1150} 1151 1152void j_rev_dct1(DCTBLOCK data){ 1153 data[0] = (data[0] + 4)>>3; 1154} 1155 1156#undef FIX 1157#undef CONST_BITS 1158