1  /********************************************************************** <BR>


2  This file is part of Crack dot Com's free source code release of


3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


4  information about compiling & licensing issues visit this URL</a>


5  <PRE> If that doesn't help, contact Jonathan Clark at


6  golgotha_source@usa.net (Subject should have "GOLG" in it)


7  ***********************************************************************/


8 


9  /*


10  * jidctflt.c


11  *


12  * Copyright (C) 19941996, Thomas G. Lane.


13  * This file is part of the Independent JPEG Group's software.


14  * For conditions of distribution and use, see the accompanying README file.


15  *


16  * This file contains a floatingpoint implementation of the


17  * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine


18  * must also perform dequantization of the input coefficients.


19  *


20  * This implementation should be more accurate than either of the integer


21  * IDCT implementations. However, it may not give the same results on all


22  * machines because of differences in roundoff behavior. Speed will depend


23  * on the hardware's floating point capacity.


24  *


25  * A 2D IDCT can be done by 1D IDCT on each column followed by 1D IDCT


26  * on each row (or vice versa, but it's more convenient to emit a row at


27  * a time). Direct algorithms are also available, but they are much more


28  * complex and seem not to be any faster when reduced to code.


29  *


30  * This implementation is based on Arai, Agui, and Nakajima's algorithm for


31  * scaled DCT. Their original paper (Trans. IEICE E71(11):1095) is in


32  * Japanese, but the algorithm is described in the Pennebaker & Mitchell


33  * JPEG textbook (see REFERENCES section in file README). The following code


34  * is based directly on figure 48 in P&M.


35  * While an 8point DCT cannot be done in less than 11 multiplies, it is


36  * possible to arrange the computation so that many of the multiplies are


37  * simple scalings of the final outputs. These multiplies can then be


38  * folded into the multiplications or divisions by the JPEG quantization


39  * table entries. The AA&N method leaves only 5 multiplies and 29 adds


40  * to be done in the DCT itself.


41  * The primary disadvantage of this method is that with a fixedpoint


42  * implementation, accuracy is lost due to imprecise representation of the


43  * scaled quantization values. However, that problem does not arise if


44  * we use floating point arithmetic.


45  */


46 


47  #define JPEG_INTERNALS


48  #include "loaders/jpg/jinclude.h"


49  #include "loaders/jpg/jpeglib.h"


50  #include "loaders/jpg/jdct.h" /* Private declarations for DCT subsystem */


51 


52  #ifdef DCT_FLOAT_SUPPORTED


53 


54 


55  /*


56  * This module is specialized to the case DCTSIZE = 8.


57  */


58 


59  #if DCTSIZE != 8


60  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */


61  #endif


62 


63 


64  /* Dequantize a coefficient by multiplying it by the multipliertable


65  * entry; produce a float result.


66  */


67 


68  #define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval))


69 


70 


71  /*


72  * Perform dequantization and inverse DCT on one block of coefficients.


73  */


74 


75  GLOBAL(void)


76  jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,


77  JCOEFPTR coef_block,


78  JSAMPARRAY output_buf, JDIMENSION output_col)


79  {


80  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;


81  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;


82  FAST_FLOAT z5, z10, z11, z12, z13;


83  JCOEFPTR inptr;


84  FLOAT_MULT_TYPE * quantptr;


85  FAST_FLOAT * wsptr;


86  JSAMPROW outptr;


87  JSAMPLE *range_limit = IDCT_range_limit(cinfo);


88  int ctr;


89  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */


90  SHIFT_TEMPS


91 


92  /* Pass 1: process columns from input, store into work array. */


93 


94  inptr = coef_block;


95  quantptr = (FLOAT_MULT_TYPE *) compptr>dct_table;


96  wsptr = workspace;


97  for (ctr = DCTSIZE; ctr > 0; ctr) {


98  /* Due to quantization, we will usually find that many of the input


99  * coefficients are zero, especially the AC terms. We can exploit this


100  * by shortcircuiting the IDCT calculation for any column in which all


101  * the AC terms are zero. In that case each output is equal to the


102  * DC coefficient (with scale factor as needed).


103  * With typical images and quantization tables, half or more of the


104  * column DCT calculations can be simplified this way.


105  */


106 


107  if ((inptr[DCTSIZE*1]  inptr[DCTSIZE*2]  inptr[DCTSIZE*3] 


108  inptr[DCTSIZE*4]  inptr[DCTSIZE*5]  inptr[DCTSIZE*6] 


109  inptr[DCTSIZE*7]) == 0) {


110  /* AC terms all zero */


111  FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);


112 


113  wsptr[DCTSIZE*0] = dcval;


114  wsptr[DCTSIZE*1] = dcval;


115  wsptr[DCTSIZE*2] = dcval;


116  wsptr[DCTSIZE*3] = dcval;


117  wsptr[DCTSIZE*4] = dcval;


118  wsptr[DCTSIZE*5] = dcval;


119  wsptr[DCTSIZE*6] = dcval;


120  wsptr[DCTSIZE*7] = dcval;


121 


122  inptr++; /* advance pointers to next column */


123  quantptr++;


124  wsptr++;


125  continue;


126  }


127 


128  /* Even part */


129 


130  tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);


131  tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);


132  tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);


133  tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);


134 


135  tmp10 = tmp0 + tmp2; /* phase 3 */


136  tmp11 = tmp0  tmp2;


137 


138  tmp13 = tmp1 + tmp3; /* phases 53 */


139  tmp12 = (tmp1  tmp3) * ((FAST_FLOAT) 1.414213562)  tmp13; /* 2*c4 */


140 


141  tmp0 = tmp10 + tmp13; /* phase 2 */


142  tmp3 = tmp10  tmp13;


143  tmp1 = tmp11 + tmp12;


144  tmp2 = tmp11  tmp12;


145 


146  /* Odd part */


147 


148  tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);


149  tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);


150  tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);


151  tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);


152 


153  z13 = tmp6 + tmp5; /* phase 6 */


154  z10 = tmp6  tmp5;


155  z11 = tmp4 + tmp7;


156  z12 = tmp4  tmp7;


157 


158  tmp7 = z11 + z13; /* phase 5 */


159  tmp11 = (z11  z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */


160 


161  z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */


162  tmp10 = ((FAST_FLOAT) 1.082392200) * z12  z5; /* 2*(c2c6) */


163  tmp12 = ((FAST_FLOAT) 2.613125930) * z10 + z5; /* 2*(c2+c6) */


164 


165  tmp6 = tmp12  tmp7; /* phase 2 */


166  tmp5 = tmp11  tmp6;


167  tmp4 = tmp10 + tmp5;


168 


169  wsptr[DCTSIZE*0] = tmp0 + tmp7;


170  wsptr[DCTSIZE*7] = tmp0  tmp7;


171  wsptr[DCTSIZE*1] = tmp1 + tmp6;


172  wsptr[DCTSIZE*6] = tmp1  tmp6;


173  wsptr[DCTSIZE*2] = tmp2 + tmp5;


174  wsptr[DCTSIZE*5] = tmp2  tmp5;


175  wsptr[DCTSIZE*4] = tmp3 + tmp4;


176  wsptr[DCTSIZE*3] = tmp3  tmp4;


177 


178  inptr++; /* advance pointers to next column */


179  quantptr++;


180  wsptr++;


181  }


182 


183  /* Pass 2: process rows from work array, store into output array. */


184  /* Note that we must descale the results by a factor of 8 == 2**3. */


185 


186  wsptr = workspace;


187  for (ctr = 0; ctr < DCTSIZE; ctr++) {


188  outptr = output_buf[ctr] + output_col;


189  /* Rows of zeroes can be exploited in the same way as we did with columns.


190  * However, the column calculation has created many nonzero AC terms, so


191  * the simplification applies less often (typically 5% to 10% of the time).


192  * And testing floats for zero is relatively expensive, so we don't bother.


193  */


194 


195  /* Even part */


196 


197  tmp10 = wsptr[0] + wsptr[4];


198  tmp11 = wsptr[0]  wsptr[4];


199 


200  tmp13 = wsptr[2] + wsptr[6];


201  tmp12 = (wsptr[2]  wsptr[6]) * ((FAST_FLOAT) 1.414213562)  tmp13;


202 


203  tmp0 = tmp10 + tmp13;


204  tmp3 = tmp10  tmp13;


205  tmp1 = tmp11 + tmp12;


206  tmp2 = tmp11  tmp12;


207 


208  /* Odd part */


209 


210  z13 = wsptr[5] + wsptr[3];


211  z10 = wsptr[5]  wsptr[3];


212  z11 = wsptr[1] + wsptr[7];


213  z12 = wsptr[1]  wsptr[7];


214 


215  tmp7 = z11 + z13;


216  tmp11 = (z11  z13) * ((FAST_FLOAT) 1.414213562);


217 


218  z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */


219  tmp10 = ((FAST_FLOAT) 1.082392200) * z12  z5; /* 2*(c2c6) */


220  tmp12 = ((FAST_FLOAT) 2.613125930) * z10 + z5; /* 2*(c2+c6) */


221 


222  tmp6 = tmp12  tmp7;


223  tmp5 = tmp11  tmp6;


224  tmp4 = tmp10 + tmp5;


225 


226  /* Final output stage: scale down by a factor of 8 and rangelimit */


227 


228  outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)


229  & RANGE_MASK];


230  outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0  tmp7), 3)


231  & RANGE_MASK];


232  outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)


233  & RANGE_MASK];


234  outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1  tmp6), 3)


235  & RANGE_MASK];


236  outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)


237  & RANGE_MASK];


238  outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2  tmp5), 3)


239  & RANGE_MASK];


240  outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)


241  & RANGE_MASK];


242  outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3  tmp4), 3)


243  & RANGE_MASK];


244 


245  wsptr += DCTSIZE; /* advance pointer to next row */


246  }


247  }


248 


249  #endif /* DCT_FLOAT_SUPPORTED */

