source: golgotha/src/i4/loaders/jpg/jfdctflt.cc

Last change on this file was 80, checked in by Sam Hocevar, 15 years ago
  • Adding the Golgotha source code. Not sure what's going to be interesting in there, but since it's all public domain, there's certainly stuff to pick up.
File size: 6.0 KB
RevLine 
[80]1/********************************************************************** <BR>
2  This file is part of Crack dot Com's free source code release of
3  Golgotha. <a href="http://www.crack.com/golgotha_release"> <BR> for
4  information about compiling & licensing issues visit this URL</a>
5  <PRE> If that doesn't help, contact Jonathan Clark at
6  golgotha_source@usa.net (Subject should have "GOLG" in it)
7***********************************************************************/
8
9/*
10 * jfdctflt.c
11 *
12 * Copyright (C) 1994-1996, Thomas G. Lane.
13 * This file is part of the Independent JPEG Group's software.
14 * For conditions of distribution and use, see the accompanying README file.
15 *
16 * This file contains a floating-point implementation of the
17 * forward DCT (Discrete Cosine Transform).
18 *
19 * This implementation should be more accurate than either of the integer
20 * DCT implementations.  However, it may not give the same results on all
21 * machines because of differences in roundoff behavior.  Speed will depend
22 * on the hardware's floating point capacity.
23 *
24 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
25 * on each column.  Direct algorithms are also available, but they are
26 * much more complex and seem not to be any faster when reduced to code.
27 *
28 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
29 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
30 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
31 * JPEG textbook (see REFERENCES section in file README).  The following code
32 * is based directly on figure 4-8 in P&M.
33 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
34 * possible to arrange the computation so that many of the multiplies are
35 * simple scalings of the final outputs.  These multiplies can then be
36 * folded into the multiplications or divisions by the JPEG quantization
37 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
38 * to be done in the DCT itself.
39 * The primary disadvantage of this method is that with a fixed-point
40 * implementation, accuracy is lost due to imprecise representation of the
41 * scaled quantization values.  However, that problem does not arise if
42 * we use floating point arithmetic.
43 */
44
45#define JPEG_INTERNALS
46#include "loaders/jpg/jinclude.h"
47#include "loaders/jpg/jpeglib.h"
48#include "loaders/jpg/jdct.h"           /* Private declarations for DCT subsystem */
49
50#ifdef DCT_FLOAT_SUPPORTED
51
52
53/*
54 * This module is specialized to the case DCTSIZE = 8.
55 */
56
57#if DCTSIZE != 8
58  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
59#endif
60
61
62/*
63 * Perform the forward DCT on one block of samples.
64 */
65
66GLOBAL(void)
67jpeg_fdct_float (FAST_FLOAT * data)
68{
69  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
70  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
71  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
72  FAST_FLOAT *dataptr;
73  int ctr;
74
75  /* Pass 1: process rows. */
76
77  dataptr = data;
78  for (ctr = DCTSIZE-1; ctr >= 0; ctr--)
79  {
80    tmp0 = dataptr[0] + dataptr[7];
81    tmp7 = dataptr[0] - dataptr[7];
82    tmp1 = dataptr[1] + dataptr[6];
83    tmp6 = dataptr[1] - dataptr[6];
84    tmp2 = dataptr[2] + dataptr[5];
85    tmp5 = dataptr[2] - dataptr[5];
86    tmp3 = dataptr[3] + dataptr[4];
87    tmp4 = dataptr[3] - dataptr[4];
88   
89    /* Even part */
90   
91    tmp10 = tmp0 + tmp3;        /* phase 2 */
92    tmp13 = tmp0 - tmp3;
93    tmp11 = tmp1 + tmp2;
94    tmp12 = tmp1 - tmp2;
95   
96    dataptr[0] = tmp10 + tmp11; /* phase 3 */
97    dataptr[4] = tmp10 - tmp11;
98   
99    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
100    dataptr[2] = tmp13 + z1;    /* phase 5 */
101    dataptr[6] = tmp13 - z1;
102   
103    /* Odd part */
104
105    tmp10 = tmp4 + tmp5;        /* phase 2 */
106    tmp11 = tmp5 + tmp6;
107    tmp12 = tmp6 + tmp7;
108
109    /* The rotator is modified from fig 4-8 to avoid extra negations. */
110    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
111    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
112    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
113    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
114
115    z11 = tmp7 + z3;            /* phase 5 */
116    z13 = tmp7 - z3;
117
118    dataptr[5] = z13 + z2;      /* phase 6 */
119    dataptr[3] = z13 - z2;
120    dataptr[1] = z11 + z4;
121    dataptr[7] = z11 - z4;
122
123    dataptr += DCTSIZE;         /* advance pointer to next row */
124  }
125
126  /* Pass 2: process columns. */
127
128  dataptr = data;
129  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
130    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
131    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
132    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
133    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
134    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
135    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
136    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
137    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
138   
139    /* Even part */
140   
141    tmp10 = tmp0 + tmp3;        /* phase 2 */
142    tmp13 = tmp0 - tmp3;
143    tmp11 = tmp1 + tmp2;
144    tmp12 = tmp1 - tmp2;
145   
146    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
147    dataptr[DCTSIZE*4] = tmp10 - tmp11;
148   
149    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
150    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
151    dataptr[DCTSIZE*6] = tmp13 - z1;
152   
153    /* Odd part */
154
155    tmp10 = tmp4 + tmp5;        /* phase 2 */
156    tmp11 = tmp5 + tmp6;
157    tmp12 = tmp6 + tmp7;
158
159    /* The rotator is modified from fig 4-8 to avoid extra negations. */
160    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
161    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
162    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
163    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
164
165    z11 = tmp7 + z3;            /* phase 5 */
166    z13 = tmp7 - z3;
167
168    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
169    dataptr[DCTSIZE*3] = z13 - z2;
170    dataptr[DCTSIZE*1] = z11 + z4;
171    dataptr[DCTSIZE*7] = z11 - z4;
172
173    dataptr++;                  /* advance pointer to next column */
174  }
175}
176
177#endif /* DCT_FLOAT_SUPPORTED */
Note: See TracBrowser for help on using the repository browser.