1  /********************************************************************** <BR>


2  This file is part of Crack dot Com's free source code release of


3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


4  information about compiling & licensing issues visit this URL</a>


5  <PRE> If that doesn't help, contact Jonathan Clark at


6  golgotha_source@usa.net (Subject should have "GOLG" in it)


7  ***********************************************************************/


8 


9  /*


10  * jfdctflt.c


11  *


12  * Copyright (C) 19941996, Thomas G. Lane.


13  * This file is part of the Independent JPEG Group's software.


14  * For conditions of distribution and use, see the accompanying README file.


15  *


16  * This file contains a floatingpoint implementation of the


17  * forward DCT (Discrete Cosine Transform).


18  *


19  * This implementation should be more accurate than either of the integer


20  * DCT implementations. However, it may not give the same results on all


21  * machines because of differences in roundoff behavior. Speed will depend


22  * on the hardware's floating point capacity.


23  *


24  * A 2D DCT can be done by 1D DCT on each row followed by 1D DCT


25  * on each column. Direct algorithms are also available, but they are


26  * much more complex and seem not to be any faster when reduced to code.


27  *


28  * This implementation is based on Arai, Agui, and Nakajima's algorithm for


29  * scaled DCT. Their original paper (Trans. IEICE E71(11):1095) is in


30  * Japanese, but the algorithm is described in the Pennebaker & Mitchell


31  * JPEG textbook (see REFERENCES section in file README). The following code


32  * is based directly on figure 48 in P&M.


33  * While an 8point DCT cannot be done in less than 11 multiplies, it is


34  * possible to arrange the computation so that many of the multiplies are


35  * simple scalings of the final outputs. These multiplies can then be


36  * folded into the multiplications or divisions by the JPEG quantization


37  * table entries. The AA&N method leaves only 5 multiplies and 29 adds


38  * to be done in the DCT itself.


39  * The primary disadvantage of this method is that with a fixedpoint


40  * implementation, accuracy is lost due to imprecise representation of the


41  * scaled quantization values. However, that problem does not arise if


42  * we use floating point arithmetic.


43  */


44 


45  #define JPEG_INTERNALS


46  #include "loaders/jpg/jinclude.h"


47  #include "loaders/jpg/jpeglib.h"


48  #include "loaders/jpg/jdct.h" /* Private declarations for DCT subsystem */


49 


50  #ifdef DCT_FLOAT_SUPPORTED


51 


52 


53  /*


54  * This module is specialized to the case DCTSIZE = 8.


55  */


56 


57  #if DCTSIZE != 8


58  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */


59  #endif


60 


61 


62  /*


63  * Perform the forward DCT on one block of samples.


64  */


65 


66  GLOBAL(void)


67  jpeg_fdct_float (FAST_FLOAT * data)


68  {


69  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;


70  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;


71  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;


72  FAST_FLOAT *dataptr;


73  int ctr;


74 


75  /* Pass 1: process rows. */


76 


77  dataptr = data;


78  for (ctr = DCTSIZE1; ctr >= 0; ctr)


79  {


80  tmp0 = dataptr[0] + dataptr[7];


81  tmp7 = dataptr[0]  dataptr[7];


82  tmp1 = dataptr[1] + dataptr[6];


83  tmp6 = dataptr[1]  dataptr[6];


84  tmp2 = dataptr[2] + dataptr[5];


85  tmp5 = dataptr[2]  dataptr[5];


86  tmp3 = dataptr[3] + dataptr[4];


87  tmp4 = dataptr[3]  dataptr[4];


88 


89  /* Even part */


90 


91  tmp10 = tmp0 + tmp3; /* phase 2 */


92  tmp13 = tmp0  tmp3;


93  tmp11 = tmp1 + tmp2;


94  tmp12 = tmp1  tmp2;


95 


96  dataptr[0] = tmp10 + tmp11; /* phase 3 */


97  dataptr[4] = tmp10  tmp11;


98 


99  z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */


100  dataptr[2] = tmp13 + z1; /* phase 5 */


101  dataptr[6] = tmp13  z1;


102 


103  /* Odd part */


104 


105  tmp10 = tmp4 + tmp5; /* phase 2 */


106  tmp11 = tmp5 + tmp6;


107  tmp12 = tmp6 + tmp7;


108 


109  /* The rotator is modified from fig 48 to avoid extra negations. */


110  z5 = (tmp10  tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */


111  z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2c6 */


112  z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */


113  z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */


114 


115  z11 = tmp7 + z3; /* phase 5 */


116  z13 = tmp7  z3;


117 


118  dataptr[5] = z13 + z2; /* phase 6 */


119  dataptr[3] = z13  z2;


120  dataptr[1] = z11 + z4;


121  dataptr[7] = z11  z4;


122 


123  dataptr += DCTSIZE; /* advance pointer to next row */


124  }


125 


126  /* Pass 2: process columns. */


127 


128  dataptr = data;


129  for (ctr = DCTSIZE1; ctr >= 0; ctr) {


130  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];


131  tmp7 = dataptr[DCTSIZE*0]  dataptr[DCTSIZE*7];


132  tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];


133  tmp6 = dataptr[DCTSIZE*1]  dataptr[DCTSIZE*6];


134  tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];


135  tmp5 = dataptr[DCTSIZE*2]  dataptr[DCTSIZE*5];


136  tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];


137  tmp4 = dataptr[DCTSIZE*3]  dataptr[DCTSIZE*4];


138 


139  /* Even part */


140 


141  tmp10 = tmp0 + tmp3; /* phase 2 */


142  tmp13 = tmp0  tmp3;


143  tmp11 = tmp1 + tmp2;


144  tmp12 = tmp1  tmp2;


145 


146  dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */


147  dataptr[DCTSIZE*4] = tmp10  tmp11;


148 


149  z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */


150  dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */


151  dataptr[DCTSIZE*6] = tmp13  z1;


152 


153  /* Odd part */


154 


155  tmp10 = tmp4 + tmp5; /* phase 2 */


156  tmp11 = tmp5 + tmp6;


157  tmp12 = tmp6 + tmp7;


158 


159  /* The rotator is modified from fig 48 to avoid extra negations. */


160  z5 = (tmp10  tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */


161  z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2c6 */


162  z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */


163  z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */


164 


165  z11 = tmp7 + z3; /* phase 5 */


166  z13 = tmp7  z3;


167 


168  dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */


169  dataptr[DCTSIZE*3] = z13  z2;


170  dataptr[DCTSIZE*1] = z11 + z4;


171  dataptr[DCTSIZE*7] = z11  z4;


172 


173  dataptr++; /* advance pointer to next column */


174  }


175  }


176 


177  #endif /* DCT_FLOAT_SUPPORTED */

