[80]  1  /********************************************************************** <BR>


 2  This file is part of Crack dot Com's free source code release of


 3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for


 4  information about compiling & licensing issues visit this URL</a>


 5  <PRE> If that doesn't help, contact Jonathan Clark at


 6  golgotha_source@usa.net (Subject should have "GOLG" in it)


 7  ***********************************************************************/


 8 


 9  /*


 10  * jfdctflt.c


 11  *


 12  * Copyright (C) 19941996, Thomas G. Lane.


 13  * This file is part of the Independent JPEG Group's software.


 14  * For conditions of distribution and use, see the accompanying README file.


 15  *


 16  * This file contains a floatingpoint implementation of the


 17  * forward DCT (Discrete Cosine Transform).


 18  *


 19  * This implementation should be more accurate than either of the integer


 20  * DCT implementations. However, it may not give the same results on all


 21  * machines because of differences in roundoff behavior. Speed will depend


 22  * on the hardware's floating point capacity.


 23  *


 24  * A 2D DCT can be done by 1D DCT on each row followed by 1D DCT


 25  * on each column. Direct algorithms are also available, but they are


 26  * much more complex and seem not to be any faster when reduced to code.


 27  *


 28  * This implementation is based on Arai, Agui, and Nakajima's algorithm for


 29  * scaled DCT. Their original paper (Trans. IEICE E71(11):1095) is in


 30  * Japanese, but the algorithm is described in the Pennebaker & Mitchell


 31  * JPEG textbook (see REFERENCES section in file README). The following code


 32  * is based directly on figure 48 in P&M.


 33  * While an 8point DCT cannot be done in less than 11 multiplies, it is


 34  * possible to arrange the computation so that many of the multiplies are


 35  * simple scalings of the final outputs. These multiplies can then be


 36  * folded into the multiplications or divisions by the JPEG quantization


 37  * table entries. The AA&N method leaves only 5 multiplies and 29 adds


 38  * to be done in the DCT itself.


 39  * The primary disadvantage of this method is that with a fixedpoint


 40  * implementation, accuracy is lost due to imprecise representation of the


 41  * scaled quantization values. However, that problem does not arise if


 42  * we use floating point arithmetic.


 43  */


 44 


 45  #define JPEG_INTERNALS


 46  #include "loaders/jpg/jinclude.h"


 47  #include "loaders/jpg/jpeglib.h"


 48  #include "loaders/jpg/jdct.h" /* Private declarations for DCT subsystem */


 49 


 50  #ifdef DCT_FLOAT_SUPPORTED


 51 


 52 


 53  /*


 54  * This module is specialized to the case DCTSIZE = 8.


 55  */


 56 


 57  #if DCTSIZE != 8


 58  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */


 59  #endif


 60 


 61 


 62  /*


 63  * Perform the forward DCT on one block of samples.


 64  */


 65 


 66  GLOBAL(void)


 67  jpeg_fdct_float (FAST_FLOAT * data)


 68  {


 69  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;


 70  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;


 71  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;


 72  FAST_FLOAT *dataptr;


 73  int ctr;


 74 


 75  /* Pass 1: process rows. */


 76 


 77  dataptr = data;


 78  for (ctr = DCTSIZE1; ctr >= 0; ctr)


 79  {


 80  tmp0 = dataptr[0] + dataptr[7];


 81  tmp7 = dataptr[0]  dataptr[7];


 82  tmp1 = dataptr[1] + dataptr[6];


 83  tmp6 = dataptr[1]  dataptr[6];


 84  tmp2 = dataptr[2] + dataptr[5];


 85  tmp5 = dataptr[2]  dataptr[5];


 86  tmp3 = dataptr[3] + dataptr[4];


 87  tmp4 = dataptr[3]  dataptr[4];


 88 


 89  /* Even part */


 90 


 91  tmp10 = tmp0 + tmp3; /* phase 2 */


 92  tmp13 = tmp0  tmp3;


 93  tmp11 = tmp1 + tmp2;


 94  tmp12 = tmp1  tmp2;


 95 


 96  dataptr[0] = tmp10 + tmp11; /* phase 3 */


 97  dataptr[4] = tmp10  tmp11;


 98 


 99  z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */


 100  dataptr[2] = tmp13 + z1; /* phase 5 */


 101  dataptr[6] = tmp13  z1;


 102 


 103  /* Odd part */


 104 


 105  tmp10 = tmp4 + tmp5; /* phase 2 */


 106  tmp11 = tmp5 + tmp6;


 107  tmp12 = tmp6 + tmp7;


 108 


 109  /* The rotator is modified from fig 48 to avoid extra negations. */


 110  z5 = (tmp10  tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */


 111  z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2c6 */


 112  z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */


 113  z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */


 114 


 115  z11 = tmp7 + z3; /* phase 5 */


 116  z13 = tmp7  z3;


 117 


 118  dataptr[5] = z13 + z2; /* phase 6 */


 119  dataptr[3] = z13  z2;


 120  dataptr[1] = z11 + z4;


 121  dataptr[7] = z11  z4;


 122 


 123  dataptr += DCTSIZE; /* advance pointer to next row */


 124  }


 125 


 126  /* Pass 2: process columns. */


 127 


 128  dataptr = data;


 129  for (ctr = DCTSIZE1; ctr >= 0; ctr) {


 130  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];


 131  tmp7 = dataptr[DCTSIZE*0]  dataptr[DCTSIZE*7];


 132  tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];


 133  tmp6 = dataptr[DCTSIZE*1]  dataptr[DCTSIZE*6];


 134  tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];


 135  tmp5 = dataptr[DCTSIZE*2]  dataptr[DCTSIZE*5];


 136  tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];


 137  tmp4 = dataptr[DCTSIZE*3]  dataptr[DCTSIZE*4];


 138 


 139  /* Even part */


 140 


 141  tmp10 = tmp0 + tmp3; /* phase 2 */


 142  tmp13 = tmp0  tmp3;


 143  tmp11 = tmp1 + tmp2;


 144  tmp12 = tmp1  tmp2;


 145 


 146  dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */


 147  dataptr[DCTSIZE*4] = tmp10  tmp11;


 148 


 149  z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */


 150  dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */


 151  dataptr[DCTSIZE*6] = tmp13  z1;


 152 


 153  /* Odd part */


 154 


 155  tmp10 = tmp4 + tmp5; /* phase 2 */


 156  tmp11 = tmp5 + tmp6;


 157  tmp12 = tmp6 + tmp7;


 158 


 159  /* The rotator is modified from fig 48 to avoid extra negations. */


 160  z5 = (tmp10  tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */


 161  z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2c6 */


 162  z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */


 163  z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */


 164 


 165  z11 = tmp7 + z3; /* phase 5 */


 166  z13 = tmp7  z3;


 167 


 168  dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */


 169  dataptr[DCTSIZE*3] = z13  z2;


 170  dataptr[DCTSIZE*1] = z11 + z4;


 171  dataptr[DCTSIZE*7] = z11  z4;


 172 


 173  dataptr++; /* advance pointer to next column */


 174  }


 175  }


 176 


 177  #endif /* DCT_FLOAT_SUPPORTED */

