1  /********************************************************************** <BR>


2  This file is part of Crack dot Com's free source code release of


3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


4  information about compiling & licensing issues visit this URL</a>


5  <PRE> If that doesn't help, contact Jonathan Clark at


6  golgotha_source@usa.net (Subject should have "GOLG" in it)


7  ***********************************************************************/


8 


9  /*


10  * jfdctfst.c


11  *


12  * Copyright (C) 19941996, Thomas G. Lane.


13  * This file is part of the Independent JPEG Group's software.


14  * For conditions of distribution and use, see the accompanying README file.


15  *


16  * This file contains a fast, not so accurate integer implementation of the


17  * forward DCT (Discrete Cosine Transform).


18  *


19  * A 2D DCT can be done by 1D DCT on each row followed by 1D DCT


20  * on each column. Direct algorithms are also available, but they are


21  * much more complex and seem not to be any faster when reduced to code.


22  *


23  * This implementation is based on Arai, Agui, and Nakajima's algorithm for


24  * scaled DCT. Their original paper (Trans. IEICE E71(11):1095) is in


25  * Japanese, but the algorithm is described in the Pennebaker & Mitchell


26  * JPEG textbook (see REFERENCES section in file README). The following code


27  * is based directly on figure 48 in P&M.


28  * While an 8point DCT cannot be done in less than 11 multiplies, it is


29  * possible to arrange the computation so that many of the multiplies are


30  * simple scalings of the final outputs. These multiplies can then be


31  * folded into the multiplications or divisions by the JPEG quantization


32  * table entries. The AA&N method leaves only 5 multiplies and 29 adds


33  * to be done in the DCT itself.


34  * The primary disadvantage of this method is that with fixedpoint math,


35  * accuracy is lost due to imprecise representation of the scaled


36  * quantization values. The smaller the quantization table entry, the less


37  * precise the scaled value, so this implementation does worse with high


38  * qualitysetting files than with lowquality ones.


39  */


40 


41  #define JPEG_INTERNALS


42  #include "loaders/jpg/jinclude.h"


43  #include "loaders/jpg/jpeglib.h"


44  #include "loaders/jpg/jdct.h" /* Private declarations for DCT subsystem */


45 


46  #ifdef DCT_IFAST_SUPPORTED


47 


48 


49  /*


50  * This module is specialized to the case DCTSIZE = 8.


51  */


52 


53  #if DCTSIZE != 8


54  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */


55  #endif


56 


57 


58  /* Scaling decisions are generally the same as in the LL&M algorithm;


59  * see jfdctint.c for more details. However, we choose to descale


60  * (right shift) multiplication products as soon as they are formed,


61  * rather than carrying additional fractional bits into subsequent additions.


62  * This compromises accuracy slightly, but it lets us save a few shifts.


63  * More importantly, 16bit arithmetic is then adequate (for 8bit samples)


64  * everywhere except in the multiplications proper; this saves a good deal


65  * of work on 16bitint machines.


66  *


67  * Again to save a few shifts, the intermediate results between pass 1 and


68  * pass 2 are not upscaled, but are represented only to integral precision.


69  *


70  * A final compromise is to represent the multiplicative constants to only


71  * 8 fractional bits, rather than 13. This saves some shifting work on some


72  * machines, and may also reduce the cost of multiplication (since there


73  * are fewer onebits in the constants).


74  */


75 


76  #define CONST_BITS 8


77 


78 


79  /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus


80  * causing a lot of useless floatingpoint operations at run time.


81  * To get around this we use the following precalculated constants.


82  * If you change CONST_BITS you may want to add appropriate values.


83  * (With a reasonable C compiler, you can just rely on the FIX() macro...)


84  */


85 


86  #if CONST_BITS == 8


87  #define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */


88  #define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */


89  #define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */


90  #define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */


91  #else


92  #define FIX_0_382683433 FIX(0.382683433)


93  #define FIX_0_541196100 FIX(0.541196100)


94  #define FIX_0_707106781 FIX(0.707106781)


95  #define FIX_1_306562965 FIX(1.306562965)


96  #endif


97 


98 


99  /* We can gain a little more speed, with a further compromise in accuracy,


100  * by omitting the addition in a descaling shift. This yields an incorrectly


101  * rounded result half the time...


102  */


103 


104  #ifndef USE_ACCURATE_ROUNDING


105  #undef DESCALE


106  #define DESCALE(x,n) RIGHT_SHIFT(x, n)


107  #endif


108 


109 


110  /* Multiply a DCTELEM variable by an INT32 constant, and immediately


111  * descale to yield a DCTELEM result.


112  */


113 


114  #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))


115 


116 


117  /*


118  * Perform the forward DCT on one block of samples.


119  */


120 


121  GLOBAL(void)


122  jpeg_fdct_ifast (DCTELEM * data)


123  {


124  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;


125  DCTELEM tmp10, tmp11, tmp12, tmp13;


126  DCTELEM z1, z2, z3, z4, z5, z11, z13;


127  DCTELEM *dataptr;


128  int ctr;


129  SHIFT_TEMPS


130 


131  /* Pass 1: process rows. */


132 


133  dataptr = data;


134  for (ctr = DCTSIZE1; ctr >= 0; ctr) {


135  tmp0 = dataptr[0] + dataptr[7];


136  tmp7 = dataptr[0]  dataptr[7];


137  tmp1 = dataptr[1] + dataptr[6];


138  tmp6 = dataptr[1]  dataptr[6];


139  tmp2 = dataptr[2] + dataptr[5];


140  tmp5 = dataptr[2]  dataptr[5];


141  tmp3 = dataptr[3] + dataptr[4];


142  tmp4 = dataptr[3]  dataptr[4];


143 


144  /* Even part */


145 


146  tmp10 = tmp0 + tmp3; /* phase 2 */


147  tmp13 = tmp0  tmp3;


148  tmp11 = tmp1 + tmp2;


149  tmp12 = tmp1  tmp2;


150 


151  dataptr[0] = tmp10 + tmp11; /* phase 3 */


152  dataptr[4] = tmp10  tmp11;


153 


154  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */


155  dataptr[2] = tmp13 + z1; /* phase 5 */


156  dataptr[6] = tmp13  z1;


157 


158  /* Odd part */


159 


160  tmp10 = tmp4 + tmp5; /* phase 2 */


161  tmp11 = tmp5 + tmp6;


162  tmp12 = tmp6 + tmp7;


163 


164  /* The rotator is modified from fig 48 to avoid extra negations. */


165  z5 = MULTIPLY(tmp10  tmp12, FIX_0_382683433); /* c6 */


166  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2c6 */


167  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */


168  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */


169 


170  z11 = tmp7 + z3; /* phase 5 */


171  z13 = tmp7  z3;


172 


173  dataptr[5] = z13 + z2; /* phase 6 */


174  dataptr[3] = z13  z2;


175  dataptr[1] = z11 + z4;


176  dataptr[7] = z11  z4;


177 


178  dataptr += DCTSIZE; /* advance pointer to next row */


179  }


180 


181  /* Pass 2: process columns. */


182 


183  dataptr = data;


184  for (ctr = DCTSIZE1; ctr >= 0; ctr) {


185  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];


186  tmp7 = dataptr[DCTSIZE*0]  dataptr[DCTSIZE*7];


187  tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];


188  tmp6 = dataptr[DCTSIZE*1]  dataptr[DCTSIZE*6];


189  tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];


190  tmp5 = dataptr[DCTSIZE*2]  dataptr[DCTSIZE*5];


191  tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];


192  tmp4 = dataptr[DCTSIZE*3]  dataptr[DCTSIZE*4];


193 


194  /* Even part */


195 


196  tmp10 = tmp0 + tmp3; /* phase 2 */


197  tmp13 = tmp0  tmp3;


198  tmp11 = tmp1 + tmp2;


199  tmp12 = tmp1  tmp2;


200 


201  dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */


202  dataptr[DCTSIZE*4] = tmp10  tmp11;


203 


204  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */


205  dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */


206  dataptr[DCTSIZE*6] = tmp13  z1;


207 


208  /* Odd part */


209 


210  tmp10 = tmp4 + tmp5; /* phase 2 */


211  tmp11 = tmp5 + tmp6;


212  tmp12 = tmp6 + tmp7;


213 


214  /* The rotator is modified from fig 48 to avoid extra negations. */


215  z5 = MULTIPLY(tmp10  tmp12, FIX_0_382683433); /* c6 */


216  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2c6 */


217  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */


218  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */


219 


220  z11 = tmp7 + z3; /* phase 5 */


221  z13 = tmp7  z3;


222 


223  dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */


224  dataptr[DCTSIZE*3] = z13  z2;


225  dataptr[DCTSIZE*1] = z11 + z4;


226  dataptr[DCTSIZE*7] = z11  z4;


227 


228  dataptr++; /* advance pointer to next column */


229  }


230  }


231 


232  #endif /* DCT_IFAST_SUPPORTED */

