jcdctmgr.c (12691B)
1 /* 2 * jcdctmgr.c 3 * 4 * Copyright (C) 1994-1995, Thomas G. Lane. 5 * This file is part of the Independent JPEG Group's software. 6 * For conditions of distribution and use, see the accompanying README file. 7 * 8 * This file contains the forward-DCT management logic. 9 * This code selects a particular DCT implementation to be used, 10 * and it performs related housekeeping chores including coefficient 11 * quantization. 12 */ 13 14 #define JPEG_INTERNALS 15 #include "jinclude.h" 16 #include "jpeglib.h" 17 #include "jdct.h" /* Private declarations for DCT subsystem */ 18 19 20 /* Private subobject for this module */ 21 22 typedef struct { 23 struct jpeg_forward_dct pub; /* public fields */ 24 25 /* Pointer to the DCT routine actually in use */ 26 forward_DCT_method_ptr do_dct; 27 28 /* The actual post-DCT divisors --- not identical to the quant table 29 * entries, because of scaling (especially for an unnormalized DCT). 30 * Each table is given in normal array order; note that this must 31 * be converted from the zigzag order of the quantization tables. 32 */ 33 DCTELEM * divisors[NUM_QUANT_TBLS]; 34 35 #ifdef DCT_FLOAT_SUPPORTED 36 /* Same as above for the floating-point case. */ 37 float_DCT_method_ptr do_float_dct; 38 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; 39 #endif 40 } my_fdct_controller; 41 42 typedef my_fdct_controller * my_fdct_ptr; 43 44 45 /* 46 * Initialize for a processing pass. 47 * Verify that all referenced Q-tables are present, and set up 48 * the divisor table for each one. 49 * In the current implementation, DCT of all components is done during 50 * the first pass, even if only some components will be output in the 51 * first scan. Hence all components should be examined here. 52 */ 53 54 METHODDEF void 55 start_pass_fdctmgr (j_compress_ptr cinfo) 56 { 57 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 58 int ci, qtblno, i; 59 jpeg_component_info *compptr; 60 JQUANT_TBL * qtbl; 61 #ifdef DCT_ISLOW_SUPPORTED 62 DCTELEM * dtbl; 63 #endif 64 65 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 66 ci++, compptr++) { 67 qtblno = compptr->quant_tbl_no; 68 /* Make sure specified quantization table is present */ 69 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || 70 cinfo->quant_tbl_ptrs[qtblno] == NULL) 71 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); 72 qtbl = cinfo->quant_tbl_ptrs[qtblno]; 73 /* Compute divisors for this quant table */ 74 /* We may do this more than once for same table, but it's not a big deal */ 75 switch (cinfo->dct_method) { 76 #ifdef DCT_ISLOW_SUPPORTED 77 case JDCT_ISLOW: 78 /* For LL&M IDCT method, divisors are equal to raw quantization 79 * coefficients multiplied by 8 (to counteract scaling). 80 */ 81 if (fdct->divisors[qtblno] == NULL) { 82 fdct->divisors[qtblno] = (DCTELEM *) 83 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 84 DCTSIZE2 * SIZEOF(DCTELEM)); 85 } 86 dtbl = fdct->divisors[qtblno]; 87 for (i = 0; i < DCTSIZE2; i++) { 88 dtbl[i] = ((DCTELEM) qtbl->quantval[jpeg_zigzag_order[i]]) << 3; 89 } 90 break; 91 #endif 92 #ifdef DCT_IFAST_SUPPORTED 93 case JDCT_IFAST: 94 { 95 /* For AA&N IDCT method, divisors are equal to quantization 96 * coefficients scaled by scalefactor[row]*scalefactor[col], where 97 * scalefactor[0] = 1 98 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 99 * We apply a further scale factor of 8. 100 */ 101 #define CONST_BITS 14 102 static const INT16 aanscales[DCTSIZE2] = { 103 /* precomputed values scaled up by 14 bits: in natural order */ 104 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 105 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 106 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 107 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 108 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 109 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 110 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 111 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 112 }; 113 SHIFT_TEMPS 114 115 if (fdct->divisors[qtblno] == NULL) { 116 fdct->divisors[qtblno] = (DCTELEM *) 117 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 118 DCTSIZE2 * SIZEOF(DCTELEM)); 119 } 120 dtbl = fdct->divisors[qtblno]; 121 for (i = 0; i < DCTSIZE2; i++) { 122 dtbl[i] = (DCTELEM) 123 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[jpeg_zigzag_order[i]], 124 (INT32) aanscales[i]), 125 CONST_BITS-3); 126 } 127 } 128 break; 129 #endif 130 #ifdef DCT_FLOAT_SUPPORTED 131 case JDCT_FLOAT: 132 { 133 /* For float AA&N IDCT method, divisors are equal to quantization 134 * coefficients scaled by scalefactor[row]*scalefactor[col], where 135 * scalefactor[0] = 1 136 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 137 * We apply a further scale factor of 8. 138 * What's actually stored is 1/divisor so that the inner loop can 139 * use a multiplication rather than a division. 140 */ 141 FAST_FLOAT * fdtbl; 142 int row, col; 143 static const double aanscalefactor[DCTSIZE] = { 144 1.0, 1.387039845, 1.306562965, 1.175875602, 145 1.0, 0.785694958, 0.541196100, 0.275899379 146 }; 147 148 if (fdct->float_divisors[qtblno] == NULL) { 149 fdct->float_divisors[qtblno] = (FAST_FLOAT *) 150 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 151 DCTSIZE2 * SIZEOF(FAST_FLOAT)); 152 } 153 fdtbl = fdct->float_divisors[qtblno]; 154 i = 0; 155 for (row = 0; row < DCTSIZE; row++) { 156 for (col = 0; col < DCTSIZE; col++) { 157 fdtbl[i] = (FAST_FLOAT) 158 (1.0 / (((double) qtbl->quantval[jpeg_zigzag_order[i]] * 159 aanscalefactor[row] * aanscalefactor[col] * 8.0))); 160 i++; 161 } 162 } 163 } 164 break; 165 #endif 166 default: 167 ERREXIT(cinfo, JERR_NOT_COMPILED); 168 break; 169 } 170 } 171 } 172 173 174 /* 175 * Perform forward DCT on one or more blocks of a component. 176 * 177 * The input samples are taken from the sample_data[] array starting at 178 * position start_row/start_col, and moving to the right for any additional 179 * blocks. The quantized coefficients are returned in coef_blocks[]. 180 */ 181 182 #if 0 // bk001204 183 METHODDEF void 184 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, 185 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 186 JDIMENSION start_row, JDIMENSION start_col, 187 JDIMENSION num_blocks) 188 /* This version is used for integer DCT implementations. */ 189 { 190 /* This routine is heavily used, so it's worth coding it tightly. */ 191 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 192 forward_DCT_method_ptr do_dct = fdct->do_dct; 193 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; 194 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */ 195 JDIMENSION bi; 196 197 sample_data += start_row; /* fold in the vertical offset once */ 198 199 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 200 /* Load data into workspace, applying unsigned->signed conversion */ 201 { register DCTELEM *workspaceptr; 202 register JSAMPROW elemptr; 203 register int elemr; 204 205 workspaceptr = workspace; 206 for (elemr = 0; elemr < DCTSIZE; elemr++) { 207 elemptr = sample_data[elemr] + start_col; 208 #if DCTSIZE == 8 /* unroll the inner loop */ 209 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 210 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 211 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 212 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 213 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 214 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 215 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 216 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 217 #else 218 { register int elemc; 219 for (elemc = DCTSIZE; elemc > 0; elemc--) { 220 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 221 } 222 } 223 #endif 224 } 225 } 226 227 /* Perform the DCT */ 228 (*do_dct) (workspace); 229 230 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 231 { register DCTELEM temp, qval; 232 register int i; 233 register JCOEFPTR output_ptr = coef_blocks[bi]; 234 235 for (i = 0; i < DCTSIZE2; i++) { 236 qval = divisors[i]; 237 temp = workspace[i]; 238 /* Divide the coefficient value by qval, ensuring proper rounding. 239 * Since C does not specify the direction of rounding for negative 240 * quotients, we have to force the dividend positive for portability. 241 * 242 * In most files, at least half of the output values will be zero 243 * (at default quantization settings, more like three-quarters...) 244 * so we should ensure that this case is fast. On many machines, 245 * a comparison is enough cheaper than a divide to make a special test 246 * a win. Since both inputs will be nonnegative, we need only test 247 * for a < b to discover whether a/b is 0. 248 * If your machine's division is fast enough, define FAST_DIVIDE. 249 */ 250 #ifdef FAST_DIVIDE 251 #define DIVIDE_BY(a,b) a /= b 252 #else 253 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0 254 #endif 255 if (temp < 0) { 256 temp = -temp; 257 temp += qval>>1; /* for rounding */ 258 DIVIDE_BY(temp, qval); 259 temp = -temp; 260 } else { 261 temp += qval>>1; /* for rounding */ 262 DIVIDE_BY(temp, qval); 263 } 264 output_ptr[i] = (JCOEF) temp; 265 } 266 } 267 } 268 } 269 #endif // 0 270 271 #ifdef DCT_FLOAT_SUPPORTED 272 273 METHODDEF void 274 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, 275 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 276 JDIMENSION start_row, JDIMENSION start_col, 277 JDIMENSION num_blocks) 278 /* This version is used for floating-point DCT implementations. */ 279 { 280 /* This routine is heavily used, so it's worth coding it tightly. */ 281 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 282 float_DCT_method_ptr do_dct = fdct->do_float_dct; 283 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; 284 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */ 285 JDIMENSION bi; 286 287 sample_data += start_row; /* fold in the vertical offset once */ 288 289 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 290 /* Load data into workspace, applying unsigned->signed conversion */ 291 { register FAST_FLOAT *workspaceptr; 292 register JSAMPROW elemptr; 293 register int elemr; 294 295 workspaceptr = workspace; 296 for (elemr = 0; elemr < DCTSIZE; elemr++) { 297 elemptr = sample_data[elemr] + start_col; 298 #if DCTSIZE == 8 /* unroll the inner loop */ 299 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 300 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 301 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 302 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 303 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 304 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 305 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 306 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 307 #else 308 { register int elemc; 309 for (elemc = DCTSIZE; elemc > 0; elemc--) { 310 *workspaceptr++ = (FAST_FLOAT) 311 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 312 } 313 } 314 #endif 315 } 316 } 317 318 /* Perform the DCT */ 319 (*do_dct) (workspace); 320 321 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 322 { register FAST_FLOAT temp; 323 register int i; 324 register JCOEFPTR output_ptr = coef_blocks[bi]; 325 326 for (i = 0; i < DCTSIZE2; i++) { 327 /* Apply the quantization and scaling factor */ 328 temp = workspace[i] * divisors[i]; 329 /* Round to nearest integer. 330 * Since C does not specify the direction of rounding for negative 331 * quotients, we have to force the dividend positive for portability. 332 * The maximum coefficient size is +-16K (for 12-bit data), so this 333 * code should work for either 16-bit or 32-bit ints. 334 */ 335 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); 336 } 337 } 338 } 339 } 340 341 #endif /* DCT_FLOAT_SUPPORTED */ 342 343 344 /* 345 * Initialize FDCT manager. 346 */ 347 348 GLOBAL void 349 jinit_forward_dct (j_compress_ptr cinfo) 350 { 351 my_fdct_ptr fdct; 352 int i; 353 354 fdct = (my_fdct_ptr) 355 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 356 SIZEOF(my_fdct_controller)); 357 cinfo->fdct = (struct jpeg_forward_dct *) fdct; 358 fdct->pub.start_pass = start_pass_fdctmgr; 359 360 switch (cinfo->dct_method) { 361 #ifdef DCT_ISLOW_SUPPORTED 362 case JDCT_ISLOW: 363 fdct->pub.forward_DCT = forward_DCT; 364 fdct->do_dct = jpeg_fdct_islow; 365 break; 366 #endif 367 #ifdef DCT_IFAST_SUPPORTED 368 case JDCT_IFAST: 369 fdct->pub.forward_DCT = forward_DCT; 370 fdct->do_dct = jpeg_fdct_ifast; 371 break; 372 #endif 373 #ifdef DCT_FLOAT_SUPPORTED 374 case JDCT_FLOAT: 375 fdct->pub.forward_DCT = forward_DCT_float; 376 fdct->do_float_dct = jpeg_fdct_float; 377 break; 378 #endif 379 default: 380 ERREXIT(cinfo, JERR_NOT_COMPILED); 381 break; 382 } 383 384 /* Mark divisor tables unallocated */ 385 for (i = 0; i < NUM_QUANT_TBLS; i++) { 386 fdct->divisors[i] = NULL; 387 #ifdef DCT_FLOAT_SUPPORTED 388 fdct->float_divisors[i] = NULL; 389 #endif 390 } 391 }