1/* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5/* 6 * jcdctmgr.c 7 * 8 * Copyright (C) 1994-1996, Thomas G. Lane. 9 * This file is part of the Independent JPEG Group's software. 10 * For conditions of distribution and use, see the accompanying README file. 11 * 12 * This file contains the forward-DCT management logic. 13 * This code selects a particular DCT implementation to be used, 14 * and it performs related housekeeping chores including coefficient 15 * quantization. 16 */ 17 18#define JPEG_INTERNALS 19#include "jinclude.h" 20#include "jpeglib.h" 21#include "jdct.h" /* Private declarations for DCT subsystem */ 22 23 24/* Private subobject for this module */ 25 26typedef struct { 27 struct jpeg_forward_dct pub; /* public fields */ 28 29 /* Pointer to the DCT routine actually in use */ 30 forward_DCT_method_ptr do_dct; 31 32 /* The actual post-DCT divisors --- not identical to the quant table 33 * entries, because of scaling (especially for an unnormalized DCT). 34 * Each table is given in normal array order. 35 */ 36 DCTELEM * divisors[NUM_QUANT_TBLS]; 37 38#ifdef DCT_FLOAT_SUPPORTED 39 /* Same as above for the floating-point case. */ 40 float_DCT_method_ptr do_float_dct; 41 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; 42#endif 43} my_fdct_controller; 44 45typedef my_fdct_controller * my_fdct_ptr; 46 47 48/* 49 * Initialize for a processing pass. 50 * Verify that all referenced Q-tables are present, and set up 51 * the divisor table for each one. 52 * In the current implementation, DCT of all components is done during 53 * the first pass, even if only some components will be output in the 54 * first scan. Hence all components should be examined here. 55 */ 56 57METHODDEF(void) 58start_pass_fdctmgr (j_compress_ptr cinfo) 59{ 60 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 61 int ci, qtblno, i; 62 jpeg_component_info *compptr; 63 JQUANT_TBL * qtbl; 64 DCTELEM * dtbl; 65 66 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 67 ci++, compptr++) { 68 qtblno = compptr->quant_tbl_no; 69 /* Make sure specified quantization table is present */ 70 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || 71 cinfo->quant_tbl_ptrs[qtblno] == NULL) 72 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); 73 qtbl = cinfo->quant_tbl_ptrs[qtblno]; 74 /* Compute divisors for this quant table */ 75 /* We may do this more than once for same table, but it's not a big deal */ 76 switch (cinfo->dct_method) { 77#ifdef DCT_ISLOW_SUPPORTED 78 case JDCT_ISLOW: 79 /* For LL&M IDCT method, divisors are equal to raw quantization 80 * coefficients multiplied by 8 (to counteract scaling). 81 */ 82 if (fdct->divisors[qtblno] == NULL) { 83 fdct->divisors[qtblno] = (DCTELEM *) 84 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 85 DCTSIZE2 * SIZEOF(DCTELEM)); 86 } 87 dtbl = fdct->divisors[qtblno]; 88 for (i = 0; i < DCTSIZE2; i++) { 89 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; 90 } 91 break; 92#endif 93#ifdef DCT_IFAST_SUPPORTED 94 case JDCT_IFAST: 95 { 96 /* For AA&N IDCT method, divisors are equal to quantization 97 * coefficients scaled by scalefactor[row]*scalefactor[col], where 98 * scalefactor[0] = 1 99 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 100 * We apply a further scale factor of 8. 101 */ 102#define CONST_BITS 14 103 static const INT16 aanscales[DCTSIZE2] = { 104 /* precomputed values scaled up by 14 bits */ 105 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 106 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 107 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 108 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 109 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 110 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 111 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 112 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 113 }; 114 SHIFT_TEMPS 115 116 if (fdct->divisors[qtblno] == NULL) { 117 fdct->divisors[qtblno] = (DCTELEM *) 118 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 119 DCTSIZE2 * SIZEOF(DCTELEM)); 120 } 121 dtbl = fdct->divisors[qtblno]; 122 for (i = 0; i < DCTSIZE2; i++) { 123 dtbl[i] = (DCTELEM) 124 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], 125 (INT32) aanscales[i]), 126 CONST_BITS-3); 127 } 128 } 129 break; 130#endif 131#ifdef DCT_FLOAT_SUPPORTED 132 case JDCT_FLOAT: 133 { 134 /* For float AA&N IDCT method, divisors are equal to quantization 135 * coefficients scaled by scalefactor[row]*scalefactor[col], where 136 * scalefactor[0] = 1 137 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 138 * We apply a further scale factor of 8. 139 * What's actually stored is 1/divisor so that the inner loop can 140 * use a multiplication rather than a division. 141 */ 142 FAST_FLOAT * fdtbl; 143 int row, col; 144 static const double aanscalefactor[DCTSIZE] = { 145 1.0, 1.387039845, 1.306562965, 1.175875602, 146 1.0, 0.785694958, 0.541196100, 0.275899379 147 }; 148 149 if (fdct->float_divisors[qtblno] == NULL) { 150 fdct->float_divisors[qtblno] = (FAST_FLOAT *) 151 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 152 DCTSIZE2 * SIZEOF(FAST_FLOAT)); 153 } 154 fdtbl = fdct->float_divisors[qtblno]; 155 i = 0; 156 for (row = 0; row < DCTSIZE; row++) { 157 for (col = 0; col < DCTSIZE; col++) { 158 fdtbl[i] = (FAST_FLOAT) 159 (1.0 / (((double) qtbl->quantval[i] * 160 aanscalefactor[row] * aanscalefactor[col] * 8.0))); 161 i++; 162 } 163 } 164 } 165 break; 166#endif 167 default: 168 ERREXIT(cinfo, JERR_NOT_COMPILED); 169 break; 170 } 171 } 172} 173 174 175/* 176 * Perform forward DCT on one or more blocks of a component. 177 * 178 * The input samples are taken from the sample_data[] array starting at 179 * position start_row/start_col, and moving to the right for any additional 180 * blocks. The quantized coefficients are returned in coef_blocks[]. 181 */ 182 183METHODDEF(void) 184forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, 185 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 186 JDIMENSION start_row, JDIMENSION start_col, 187 JDIMENSION num_blocks) 188/* This version is used for integer DCT implementations. */ 189{ 190 /* This routine is heavily used, so it's worth coding it tightly. */ 191 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 192 forward_DCT_method_ptr do_dct = fdct->do_dct; 193 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; 194 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */ 195 JDIMENSION bi; 196 197 sample_data += start_row; /* fold in the vertical offset once */ 198 199 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 200 /* Load data into workspace, applying unsigned->signed conversion */ 201 { register DCTELEM *workspaceptr; 202 register JSAMPROW elemptr; 203 register int elemr; 204 205 workspaceptr = workspace; 206 for (elemr = 0; elemr < DCTSIZE; elemr++) { 207 elemptr = sample_data[elemr] + start_col; 208#if DCTSIZE == 8 /* unroll the inner loop */ 209 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 210 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 211 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 212 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 213 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 214 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 215 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 216 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 217#else 218 { register int elemc; 219 for (elemc = DCTSIZE; elemc > 0; elemc--) { 220 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 221 } 222 } 223#endif 224 } 225 } 226 227 /* Perform the DCT */ 228 (*do_dct) (workspace); 229 230 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 231 { register DCTELEM temp, qval; 232 register int i; 233 register JCOEFPTR output_ptr = coef_blocks[bi]; 234 235 for (i = 0; i < DCTSIZE2; i++) { 236 qval = divisors[i]; 237 temp = workspace[i]; 238 /* Divide the coefficient value by qval, ensuring proper rounding. 239 * Since C does not specify the direction of rounding for negative 240 * quotients, we have to force the dividend positive for portability. 241 * 242 * In most files, at least half of the output values will be zero 243 * (at default quantization settings, more like three-quarters...) 244 * so we should ensure that this case is fast. On many machines, 245 * a comparison is enough cheaper than a divide to make a special test 246 * a win. Since both inputs will be nonnegative, we need only test 247 * for a < b to discover whether a/b is 0. 248 * If your machine's division is fast enough, define FAST_DIVIDE. 249 */ 250#ifdef FAST_DIVIDE 251#define DIVIDE_BY(a,b) a /= b 252#else 253#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 254#endif 255 if (temp < 0) { 256 temp = -temp; 257 temp += qval>>1; /* for rounding */ 258 DIVIDE_BY(temp, qval); 259 temp = -temp; 260 } else { 261 temp += qval>>1; /* for rounding */ 262 DIVIDE_BY(temp, qval); 263 } 264 output_ptr[i] = (JCOEF) temp; 265 } 266 } 267 } 268} 269 270 271#ifdef DCT_FLOAT_SUPPORTED 272 273METHODDEF(void) 274forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, 275 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 276 JDIMENSION start_row, JDIMENSION start_col, 277 JDIMENSION num_blocks) 278/* This version is used for floating-point DCT implementations. */ 279{ 280 /* This routine is heavily used, so it's worth coding it tightly. */ 281 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 282 float_DCT_method_ptr do_dct = fdct->do_float_dct; 283 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; 284 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */ 285 JDIMENSION bi; 286 287 sample_data += start_row; /* fold in the vertical offset once */ 288 289 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 290 /* Load data into workspace, applying unsigned->signed conversion */ 291 { register FAST_FLOAT *workspaceptr; 292 register JSAMPROW elemptr; 293 register int elemr; 294 295 workspaceptr = workspace; 296 for (elemr = 0; elemr < DCTSIZE; elemr++) { 297 elemptr = sample_data[elemr] + start_col; 298#if DCTSIZE == 8 /* unroll the inner loop */ 299 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 300 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 301 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 302 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 303 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 304 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 305 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 306 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 307#else 308 { register int elemc; 309 for (elemc = DCTSIZE; elemc > 0; elemc--) { 310 *workspaceptr++ = (FAST_FLOAT) 311 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 312 } 313 } 314#endif 315 } 316 } 317 318 /* Perform the DCT */ 319 (*do_dct) (workspace); 320 321 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 322 { register FAST_FLOAT temp; 323 register int i; 324 register JCOEFPTR output_ptr = coef_blocks[bi]; 325 326 for (i = 0; i < DCTSIZE2; i++) { 327 /* Apply the quantization and scaling factor */ 328 temp = workspace[i] * divisors[i]; 329 /* Round to nearest integer. 330 * Since C does not specify the direction of rounding for negative 331 * quotients, we have to force the dividend positive for portability. 332 * The maximum coefficient size is +-16K (for 12-bit data), so this 333 * code should work for either 16-bit or 32-bit ints. 334 */ 335 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); 336 } 337 } 338 } 339} 340 341#endif /* DCT_FLOAT_SUPPORTED */ 342 343 344/* 345 * Initialize FDCT manager. 346 */ 347 348GLOBAL(void) 349jinit_forward_dct (j_compress_ptr cinfo) 350{ 351 my_fdct_ptr fdct; 352 int i; 353 354 fdct = (my_fdct_ptr) 355 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 356 SIZEOF(my_fdct_controller)); 357 cinfo->fdct = (struct jpeg_forward_dct *) fdct; 358 fdct->pub.start_pass = start_pass_fdctmgr; 359 360 switch (cinfo->dct_method) { 361#ifdef DCT_ISLOW_SUPPORTED 362 case JDCT_ISLOW: 363 fdct->pub.forward_DCT = forward_DCT; 364 fdct->do_dct = jpeg_fdct_islow; 365 break; 366#endif 367#ifdef DCT_IFAST_SUPPORTED 368 case JDCT_IFAST: 369 fdct->pub.forward_DCT = forward_DCT; 370 fdct->do_dct = jpeg_fdct_ifast; 371 break; 372#endif 373#ifdef DCT_FLOAT_SUPPORTED 374 case JDCT_FLOAT: 375 fdct->pub.forward_DCT = forward_DCT_float; 376 fdct->do_float_dct = jpeg_fdct_float; 377 break; 378#endif 379 default: 380 ERREXIT(cinfo, JERR_NOT_COMPILED); 381 break; 382 } 383 384 /* Mark divisor tables unallocated */ 385 for (i = 0; i < NUM_QUANT_TBLS; i++) { 386 fdct->divisors[i] = NULL; 387#ifdef DCT_FLOAT_SUPPORTED 388 fdct->float_divisors[i] = NULL; 389#endif 390 } 391} 392