pax_global_header00006660000000000000000000000064124672577550014536gustar00rootroot0000000000000052 comment=c6fd2dd6d21397baa6653139d31d84540d5449a2 OouraFFT-1.0/000077500000000000000000000000001246725775500130235ustar00rootroot00000000000000OouraFFT-1.0/.gitignore000066400000000000000000000003671246725775500150210ustar00rootroot00000000000000*.o *.Po *.a *.la *.lai *.lo *.slo *.Plo *.so *.dylib *~ *.log *.swp *.cache/* *Makefile config.h config.status compile libtool ffts.pc stamp-h1 tests/test java/android/local.properties java/android/gen/* java/android/obj/* java/android/bin/* OouraFFT-1.0/alloc.c000066400000000000000000000050601246725775500142620ustar00rootroot00000000000000/* ---- memory allocation ---- */ #include "alloc.h" #define alloc_error_check(p) { \ if ((p) == NULL) { \ fprintf(stderr, "Allocation Failure!\n"); \ exit(1); \ } \ } int *alloc_1d_int(int n1) { int *i; i = (int *) malloc(sizeof(int) * n1); alloc_error_check(i); return i; } void free_1d_int(int *i) { free(i); } double *alloc_1d_double(int n1) { double *d; d = (double *) malloc(sizeof(double) * n1); alloc_error_check(d); return d; } void free_1d_double(double *d) { free(d); } int **alloc_2d_int(int n1, int n2) { int **ii, *i; int j; ii = (int **) malloc(sizeof(int *) * n1); alloc_error_check(ii); i = (int *) malloc(sizeof(int) * n1 * n2); alloc_error_check(i); ii[0] = i; for (j = 1; j < n1; j++) { ii[j] = ii[j - 1] + n2; } return ii; } void free_2d_int(int **ii) { free(ii[0]); free(ii); } double **alloc_2d_double(int n1, int n2) { double **dd, *d; int j; dd = (double **) malloc(sizeof(double *) * n1); alloc_error_check(dd); d = (double *) malloc(sizeof(double) * n1 * n2); alloc_error_check(d); dd[0] = d; for (j = 1; j < n1; j++) { dd[j] = dd[j - 1] + n2; } return dd; } void free_2d_double(double **dd) { free(dd[0]); free(dd); } int ***alloc_3d_int(int n1, int n2, int n3) { int ***iii, **ii, *i; int j; iii = (int ***) malloc(sizeof(int **) * n1); alloc_error_check(iii); ii = (int **) malloc(sizeof(int *) * n1 * n2); alloc_error_check(ii); iii[0] = ii; for (j = 1; j < n1; j++) { iii[j] = iii[j - 1] + n2; } i = (int *) malloc(sizeof(int) * n1 * n2 * n3); alloc_error_check(i); ii[0] = i; for (j = 1; j < n1 * n2; j++) { ii[j] = ii[j - 1] + n3; } return iii; } void free_3d_int(int ***iii) { free(iii[0][0]); free(iii[0]); free(iii); } double ***alloc_3d_double(int n1, int n2, int n3) { double ***ddd, **dd, *d; int j; ddd = (double ***) malloc(sizeof(double **) * n1); alloc_error_check(ddd); dd = (double **) malloc(sizeof(double *) * n1 * n2); alloc_error_check(dd); ddd[0] = dd; for (j = 1; j < n1; j++) { ddd[j] = ddd[j - 1] + n2; } d = (double *) malloc(sizeof(double) * n1 * n2 * n3); alloc_error_check(d); dd[0] = d; for (j = 1; j < n1 * n2; j++) { dd[j] = dd[j - 1] + n3; } return ddd; } void free_3d_double(double ***ddd) { free(ddd[0][0]); free(ddd[0]); free(ddd); } OouraFFT-1.0/alloc.h000066400000000000000000000007621246725775500142730ustar00rootroot00000000000000/* ---- memory allocation ---- */ #include #include int *alloc_1d_int(int n1); void free_1d_int(int *i); double *alloc_1d_double(int n1); void free_1d_double(double *d); int **alloc_2d_int(int n1, int n2); void free_2d_int(int **ii); double **alloc_2d_double(int n1, int n2); void free_2d_double(double **dd); int ***alloc_3d_int(int n1, int n2, int n3); void free_3d_int(int ***iii); double ***alloc_3d_double(int n1, int n2, int n3); void free_3d_double(double ***ddd); OouraFFT-1.0/fft4f2d.c000066400000000000000000001560141246725775500144350ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :two data length :power of 2 decimation :frequency radix :4, 2, row-column data :inplace table :use functions cdft2d: Complex Discrete Fourier Transform rdft2d: Real Discrete Fourier Transform ddct2d: Discrete Cosine Transform ddst2d: Discrete Sine Transform function prototypes void cdft2d(int, int, int, double **, int *, double *); void rdft2d(int, int, int, double **, int *, double *); void ddct2d(int, int, int, double **, double **, int *, double *); void ddst2d(int, int, int, double **, double **, int *, double *); -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * exp(2*pi*i*j1*k1/n1) * exp(2*pi*i*j2*k2/n2), 0<=k1 X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * exp(-2*pi*i*j1*k1/n1) * exp(-2*pi*i*j2*k2/n2), 0<=k1 ip[0] = 0; // first time only cdft2d(n1, 2*n2, 1, a, ip, w); ip[0] = 0; // first time only cdft2d(n1, 2*n2, -1, a, ip, w); [parameters] n1 :data length (int) n1 >= 1, n1 = power of 2 2*n2 :data length (int) n2 >= 1, n2 = power of 2 a[0...n1-1][0...2*n2-1] :input/output data (double **) input data a[j1][2*j2] = Re(x[j1][j2]), a[j1][2*j2+1] = Im(x[j1][j2]), 0<=j1= 2+sqrt(n) (n = max(n1, n2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft2d(n1, 2*n2, -1, a, ip, w); is cdft2d(n1, 2*n2, 1, a, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { a[j1][j2] *= 1.0 / (n1 * n2); } } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), 0<=k1 IRDFT (excluding scale) a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 (R[j1][j2] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + I[j1][j2] * sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), 0<=k1 ip[0] = 0; // first time only rdft2d(n1, n2, 1, a, ip, w); ip[0] = 0; // first time only rdft2d(n1, n2, -1, a, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) output data a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], 0 input data a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], 0= 2+sqrt(n) (n = max(n1, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/4) + n2/4 w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft2d(n1, n2, 1, a, ip, w); is rdft2d(n1, n2, -1, a, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 2.0 / (n1 * n2); } } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(pi*j1*(k1+1/2)/n1) * cos(pi*j2*(k2+1/2)/n2), 0<=k1 DCT C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(pi*(j1+1/2)*k1/n1) * cos(pi*(j2+1/2)*k2/n2), 0<=k1 ip[0] = 0; // first time only ddct2d(n1, n2, 1, a, t, ip, w); ip[0] = 0; // first time only ddct2d(n1, n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) output data a[k1][k2] = C[k1][k2], 0<=k1= 2+sqrt(n) (n = max(n1, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/4) + max(n1, n2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct2d(n1, n2, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { a[j1][0] *= 0.5; } for (j2 = 0; j2 <= n2 - 1; j2++) { a[0][j2] *= 0.5; } ddct2d(n1, n2, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 4.0 / (n1 * n2); } } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * sin(pi*j1*(k1+1/2)/n1) * sin(pi*j2*(k2+1/2)/n2), 0<=k1 DST S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * sin(pi*(j1+1/2)*k1/n1) * sin(pi*(j2+1/2)*k2/n2), 0 ip[0] = 0; // first time only ddst2d(n1, n2, 1, a, t, ip, w); ip[0] = 0; // first time only ddst2d(n1, n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) input data a[j1][j2] = A[j1][j2], 0 output data a[k1][k2] = S[k1][k2], 0= 2+sqrt(n) (n = max(n1, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/4) + max(n1, n2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst2d(n1, n2, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { a[j1][0] *= 0.5; } for (j2 = 0; j2 <= n2 - 1; j2++) { a[0][j2] *= 0.5; } ddst2d(n1, n2, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 4.0 / (n1 * n2); } } . */ void cdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void bitrv2col(int n1, int n, int *ip, double **a); void bitrv2row(int n, int n2, int *ip, double **a); void cftbcol(int n1, int n, double **a, double *w); void cftbrow(int n, int n2, double **a, double *w); void cftfcol(int n1, int n, double **a, double *w); void cftfrow(int n, int n2, double **a, double *w); int n; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } if (n2 > 4) { bitrv2col(n1, n2, ip + 2, a); } if (n1 > 2) { bitrv2row(n1, n2, ip + 2, a); } if (isgn < 0) { cftfcol(n1, n2, a, w); cftfrow(n1, n2, a, w); } else { cftbcol(n1, n2, a, w); cftbrow(n1, n2, a, w); } } void rdft2d(int n1, int n2, int isgn, double **a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2col(int n1, int n, int *ip, double **a); void bitrv2row(int n, int n2, int *ip, double **a); void cftbcol(int n1, int n, double **a, double *w); void cftbrow(int n, int n2, double **a, double *w); void cftfcol(int n1, int n, double **a, double *w); void cftfrow(int n, int n2, double **a, double *w); void rftbcol(int n1, int n, double **a, int nc, double *c); void rftfcol(int n1, int n, double **a, int nc, double *c); int n, nw, nc, n1h, i, j; double xi; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, ip, w + nw); } n1h = n1 >> 1; if (isgn < 0) { for (i = 1; i <= n1h - 1; i++) { j = n1 - i; xi = a[i][0] - a[j][0]; a[i][0] += a[j][0]; a[j][0] = xi; xi = a[j][1] - a[i][1]; a[i][1] += a[j][1]; a[j][1] = xi; } if (n1 > 2) { bitrv2row(n1, n2, ip + 2, a); } cftfrow(n1, n2, a, w); for (i = 0; i <= n1 - 1; i++) { a[i][1] = 0.5 * (a[i][0] - a[i][1]); a[i][0] -= a[i][1]; } if (n2 > 4) { rftfcol(n1, n2, a, nc, w + nw); bitrv2col(n1, n2, ip + 2, a); } cftfcol(n1, n2, a, w); } else { if (n2 > 4) { bitrv2col(n1, n2, ip + 2, a); } cftbcol(n1, n2, a, w); if (n2 > 4) { rftbcol(n1, n2, a, nc, w + nw); } for (i = 0; i <= n1 - 1; i++) { xi = a[i][0] - a[i][1]; a[i][0] += a[i][1]; a[i][1] = xi; } if (n1 > 2) { bitrv2row(n1, n2, ip + 2, a); } cftbrow(n1, n2, a, w); for (i = 1; i <= n1h - 1; i++) { j = n1 - i; a[j][0] = 0.5 * (a[i][0] - a[j][0]); a[i][0] -= a[j][0]; a[j][1] = 0.5 * (a[i][1] + a[j][1]); a[i][1] -= a[j][1]; } } } void ddct2d(int n1, int n2, int isgn, double **a, double **t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2col(int n1, int n, int *ip, double **a); void bitrv2row(int n, int n2, int *ip, double **a); void cftbcol(int n1, int n, double **a, double *w); void cftbrow(int n, int n2, double **a, double *w); void cftfcol(int n1, int n, double **a, double *w); void cftfrow(int n, int n2, double **a, double *w); void rftbcol(int n1, int n, double **a, int nc, double *c); void rftfcol(int n1, int n, double **a, int nc, double *c); void dctbsub(int n1, int n2, double **a, int nc, double *c); void dctfsub(int n1, int n2, double **a, int nc, double *c); int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; double xi; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n1 > nc || n2 > nc) { if (n1 > n2) { nc = n1; } else { nc = n2; } makect(nc, ip, w + nw); } n1h = n1 >> 1; n2h = n2 >> 1; if (isgn >= 0) { for (i = 0; i <= n1 - 1; i++) { for (j = 1; j <= n2h - 1; j++) { jx = j << 1; t[i][jx] = a[i][j]; t[i][jx + 1] = a[i][n2 - j]; } } t[0][0] = a[0][0]; t[0][1] = a[0][n2h]; t[n1h][0] = a[n1h][0]; t[n1h][1] = a[n1h][n2h]; for (i = 1; i <= n1h - 1; i++) { ic = n1 - i; t[i][0] = a[i][0]; t[ic][1] = a[i][n2h]; t[i][1] = a[ic][0]; t[ic][0] = a[ic][n2h]; } dctfsub(n1, n2, t, nc, w + nw); if (n1 > 2) { bitrv2row(n1, n2, ip + 2, t); } cftfrow(n1, n2, t, w); for (i = 0; i <= n1 - 1; i++) { t[i][1] = 0.5 * (t[i][0] - t[i][1]); t[i][0] -= t[i][1]; } if (n2 > 4) { rftfcol(n1, n2, t, nc, w + nw); bitrv2col(n1, n2, ip + 2, t); } cftfcol(n1, n2, t, w); for (i = 0; i <= n1h - 1; i++) { ix = i << 1; ic = n1 - 1 - i; for (j = 0; j <= n2h - 1; j++) { jx = j << 1; jc = n2 - 1 - j; a[ix][jx] = t[i][j]; a[ix][jx + 1] = t[i][jc]; a[ix + 1][jx] = t[ic][j]; a[ix + 1][jx + 1] = t[ic][jc]; } } } else { for (i = 0; i <= n1h - 1; i++) { ix = i << 1; ic = n1 - 1 - i; for (j = 0; j <= n2h - 1; j++) { jx = j << 1; jc = n2 - 1 - j; t[i][j] = a[ix][jx]; t[i][jc] = a[ix][jx + 1]; t[ic][j] = a[ix + 1][jx]; t[ic][jc] = a[ix + 1][jx + 1]; } } if (n2 > 4) { bitrv2col(n1, n2, ip + 2, t); } cftbcol(n1, n2, t, w); if (n2 > 4) { rftbcol(n1, n2, t, nc, w + nw); } for (i = 0; i <= n1 - 1; i++) { xi = t[i][0] - t[i][1]; t[i][0] += t[i][1]; t[i][1] = xi; } if (n1 > 2) { bitrv2row(n1, n2, ip + 2, t); } cftbrow(n1, n2, t, w); dctbsub(n1, n2, t, nc, w + nw); for (i = 0; i <= n1 - 1; i++) { for (j = 1; j <= n2h - 1; j++) { jx = j << 1; a[i][j] = t[i][jx]; a[i][n2 - j] = t[i][jx + 1]; } } a[0][0] = t[0][0]; a[0][n2h] = t[0][1]; a[n1h][0] = t[n1h][0]; a[n1h][n2h] = t[n1h][1]; for (i = 1; i <= n1h - 1; i++) { ic = n1 - i; a[i][0] = t[i][0]; a[i][n2h] = t[ic][1]; a[ic][0] = t[i][1]; a[ic][n2h] = t[ic][0]; } } } void ddst2d(int n1, int n2, int isgn, double **a, double **t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2col(int n1, int n, int *ip, double **a); void bitrv2row(int n, int n2, int *ip, double **a); void cftbcol(int n1, int n, double **a, double *w); void cftbrow(int n, int n2, double **a, double *w); void cftfcol(int n1, int n, double **a, double *w); void cftfrow(int n, int n2, double **a, double *w); void rftbcol(int n1, int n, double **a, int nc, double *c); void rftfcol(int n1, int n, double **a, int nc, double *c); void dstbsub(int n1, int n2, double **a, int nc, double *c); void dstfsub(int n1, int n2, double **a, int nc, double *c); int n, nw, nc, n1h, n2h, i, ix, ic, j, jx, jc; double xi; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n1 > nc || n2 > nc) { if (n1 > n2) { nc = n1; } else { nc = n2; } makect(nc, ip, w + nw); } n1h = n1 >> 1; n2h = n2 >> 1; if (isgn >= 0) { for (i = 0; i <= n1 - 1; i++) { for (j = 1; j <= n2h - 1; j++) { jx = j << 1; t[i][jx] = a[i][j]; t[i][jx + 1] = a[i][n2 - j]; } } t[0][0] = a[0][0]; t[0][1] = a[0][n2h]; t[n1h][0] = a[n1h][0]; t[n1h][1] = a[n1h][n2h]; for (i = 1; i <= n1h - 1; i++) { ic = n1 - i; t[i][0] = a[i][0]; t[ic][1] = a[i][n2h]; t[i][1] = a[ic][0]; t[ic][0] = a[ic][n2h]; } dstfsub(n1, n2, t, nc, w + nw); if (n1 > 2) { bitrv2row(n1, n2, ip + 2, t); } cftfrow(n1, n2, t, w); for (i = 0; i <= n1 - 1; i++) { t[i][1] = 0.5 * (t[i][0] - t[i][1]); t[i][0] -= t[i][1]; } if (n2 > 4) { rftfcol(n1, n2, t, nc, w + nw); bitrv2col(n1, n2, ip + 2, t); } cftfcol(n1, n2, t, w); for (i = 0; i <= n1h - 1; i++) { ix = i << 1; ic = n1 - 1 - i; for (j = 0; j <= n2h - 1; j++) { jx = j << 1; jc = n2 - 1 - j; a[ix][jx] = t[i][j]; a[ix][jx + 1] = -t[i][jc]; a[ix + 1][jx] = -t[ic][j]; a[ix + 1][jx + 1] = t[ic][jc]; } } } else { for (i = 0; i <= n1h - 1; i++) { ix = i << 1; ic = n1 - 1 - i; for (j = 0; j <= n2h - 1; j++) { jx = j << 1; jc = n2 - 1 - j; t[i][j] = a[ix][jx]; t[i][jc] = -a[ix][jx + 1]; t[ic][j] = -a[ix + 1][jx]; t[ic][jc] = a[ix + 1][jx + 1]; } } if (n2 > 4) { bitrv2col(n1, n2, ip + 2, t); } cftbcol(n1, n2, t, w); if (n2 > 4) { rftbcol(n1, n2, t, nc, w + nw); } for (i = 0; i <= n1 - 1; i++) { xi = t[i][0] - t[i][1]; t[i][0] += t[i][1]; t[i][1] = xi; } if (n1 > 2) { bitrv2row(n1, n2, ip + 2, t); } cftbrow(n1, n2, t, w); dstbsub(n1, n2, t, nc, w + nw); for (i = 0; i <= n1 - 1; i++) { for (j = 1; j <= n2h - 1; j++) { jx = j << 1; a[i][j] = t[i][jx]; a[i][n2 - j] = t[i][jx + 1]; } } a[0][0] = t[0][0]; a[0][n2h] = t[0][1]; a[n1h][0] = t[n1h][0]; a[n1h][n2h] = t[n1h][1]; for (i = 1; i <= n1h - 1; i++) { ic = n1 - i; a[i][0] = t[i][0]; a[i][n2h] = t[ic][1]; a[ic][0] = t[i][1]; a[ic][n2h] = t[ic][0]; } } } /* -------- initializing routines -------- */ #include void makewt(int nw, int *ip, double *w) { void bitrv2(int n, int *ip, double *a); int nwh, j; double delta, x, y; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; w[0] = 1; w[1] = 0; w[nwh] = cos(delta * nwh); w[nwh + 1] = w[nwh]; for (j = 2; j <= nwh - 2; j += 2) { x = cos(delta * j); y = sin(delta * j); w[j] = x; w[j + 1] = y; w[nw - j] = y; w[nw - j + 1] = x; } bitrv2(nw, ip + 2, w); } } void makect(int nc, int *ip, double *c) { int nch, j; double delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = 0.5; c[nch] = 0.5 * cos(delta * nch); for (j = 1; j <= nch - 1; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ void bitrv2(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi; ip[0] = 0; l = n; m = 1; while ((m << 2) < l) { l >>= 1; for (j = 0; j <= m - 1; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } if ((m << 2) > l) { for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = (j << 1) + ip[k]; k1 = (k << 1) + ip[j]; xr = a[j1]; xi = a[j1 + 1]; a[j1] = a[k1]; a[j1 + 1] = a[k1 + 1]; a[k1] = xr; a[k1 + 1] = xi; } } } else { m2 = m << 1; for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = (j << 1) + ip[k]; k1 = (k << 1) + ip[j]; xr = a[j1]; xi = a[j1 + 1]; a[j1] = a[k1]; a[j1 + 1] = a[k1 + 1]; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = a[j1 + 1]; a[j1] = a[k1]; a[j1 + 1] = a[k1 + 1]; a[k1] = xr; a[k1 + 1] = xi; } } } } void bitrv2col(int n1, int n, int *ip, double **a) { int i, j, j1, k, k1, l, m, m2; double xr, xi; ip[0] = 0; l = n; m = 1; while ((m << 2) < l) { l >>= 1; for (j = 0; j <= m - 1; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } if ((m << 2) > l) { for (i = 0; i <= n1 - 1; i++) { for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = (j << 1) + ip[k]; k1 = (k << 1) + ip[j]; xr = a[i][j1]; xi = a[i][j1 + 1]; a[i][j1] = a[i][k1]; a[i][j1 + 1] = a[i][k1 + 1]; a[i][k1] = xr; a[i][k1 + 1] = xi; } } } } else { m2 = m << 1; for (i = 0; i <= n1 - 1; i++) { for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = (j << 1) + ip[k]; k1 = (k << 1) + ip[j]; xr = a[i][j1]; xi = a[i][j1 + 1]; a[i][j1] = a[i][k1]; a[i][j1 + 1] = a[i][k1 + 1]; a[i][k1] = xr; a[i][k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[i][j1]; xi = a[i][j1 + 1]; a[i][j1] = a[i][k1]; a[i][j1 + 1] = a[i][k1 + 1]; a[i][k1] = xr; a[i][k1 + 1] = xi; } } } } } void bitrv2row(int n, int n2, int *ip, double **a) { int i, j, j1, k, k1, l, m; double xr, xi; ip[0] = 0; l = n; m = 1; while ((m << 1) < l) { l >>= 1; for (j = 0; j <= m - 1; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } if ((m << 1) > l) { for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = j + ip[k]; k1 = k + ip[j]; for (i = 0; i <= n2 - 2; i += 2) { xr = a[j1][i]; xi = a[j1][i + 1]; a[j1][i] = a[k1][i]; a[j1][i + 1] = a[k1][i + 1]; a[k1][i] = xr; a[k1][i + 1] = xi; } } } } else { for (k = 1; k <= m - 1; k++) { for (j = 0; j <= k - 1; j++) { j1 = j + ip[k]; k1 = k + ip[j]; for (i = 0; i <= n2 - 2; i += 2) { xr = a[j1][i]; xi = a[j1][i + 1]; a[j1][i] = a[k1][i]; a[j1][i + 1] = a[k1][i + 1]; a[k1][i] = xr; a[k1][i + 1] = xi; } j1 += m; k1 += m; for (i = 0; i <= n2 - 2; i += 2) { xr = a[j1][i]; xi = a[j1][i + 1]; a[j1][i] = a[k1][i]; a[j1][i + 1] = a[k1][i + 1]; a[k1][i] = xr; a[k1][i + 1] = xi; } } } } } void cftbcol(int n1, int n, double **a, double *w) { int i, j, j1, j2, j3, k, k1, ks, l, m; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; for (i = 0; i <= n1 - 1; i++) { l = 2; while ((l << 1) < n) { m = l << 2; for (j = 0; j <= l - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; a[i][j2] = x0r - x2r; a[i][j2 + 1] = x0i - x2i; a[i][j1] = x1r - x3i; a[i][j1 + 1] = x1i + x3r; a[i][j3] = x1r + x3i; a[i][j3 + 1] = x1i - x3r; } if (m < n) { wk1r = w[2]; for (j = m; j <= l + m - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; a[i][j2] = x2i - x0i; a[i][j2 + 1] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[i][j1] = wk1r * (x0r - x0i); a[i][j1 + 1] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[i][j3] = wk1r * (x0i - x0r); a[i][j3 + 1] = wk1r * (x0i + x0r); } k1 = 1; ks = -1; for (k = (m << 1); k <= n - m; k += m) { k1++; ks = -ks; wk1r = w[k1 << 1]; wk1i = w[(k1 << 1) + 1]; wk2r = ks * w[k1]; wk2i = w[k1 + ks]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j <= l + k - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[i][j2] = wk2r * x0r - wk2i * x0i; a[i][j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[i][j1] = wk1r * x0r - wk1i * x0i; a[i][j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[i][j3] = wk3r * x0r - wk3i * x0i; a[i][j3 + 1] = wk3r * x0i + wk3i * x0r; } } } l = m; } if (l < n) { for (j = 0; j <= l - 2; j += 2) { j1 = j + l; x0r = a[i][j] - a[i][j1]; x0i = a[i][j + 1] - a[i][j1 + 1]; a[i][j] += a[i][j1]; a[i][j + 1] += a[i][j1 + 1]; a[i][j1] = x0r; a[i][j1 + 1] = x0i; } } } } void cftbrow(int n, int n2, double **a, double *w) { int i, j, j1, j2, j3, k, k1, ks, l, m; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 1; while ((l << 1) < n) { m = l << 2; for (j = 0; j <= l - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; a[j2][i] = x0r - x2r; a[j2][i + 1] = x0i - x2i; a[j1][i] = x1r - x3i; a[j1][i + 1] = x1i + x3r; a[j3][i] = x1r + x3i; a[j3][i + 1] = x1i - x3r; } } if (m < n) { wk1r = w[2]; for (j = m; j <= l + m - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; a[j2][i] = x2i - x0i; a[j2][i + 1] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1][i] = wk1r * (x0r - x0i); a[j1][i + 1] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[j3][i] = wk1r * (x0i - x0r); a[j3][i + 1] = wk1r * (x0i + x0r); } } k1 = 1; ks = -1; for (k = (m << 1); k <= n - m; k += m) { k1++; ks = -ks; wk1r = w[k1 << 1]; wk1i = w[(k1 << 1) + 1]; wk2r = ks * w[k1]; wk2i = w[k1 + ks]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j <= l + k - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2][i] = wk2r * x0r - wk2i * x0i; a[j2][i + 1] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1][i] = wk1r * x0r - wk1i * x0i; a[j1][i + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3][i] = wk3r * x0r - wk3i * x0i; a[j3][i + 1] = wk3r * x0i + wk3i * x0r; } } } } l = m; } if (l < n) { for (j = 0; j <= l - 1; j++) { j1 = j + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] - a[j1][i]; x0i = a[j][i + 1] - a[j1][i + 1]; a[j][i] += a[j1][i]; a[j][i + 1] += a[j1][i + 1]; a[j1][i] = x0r; a[j1][i + 1] = x0i; } } } } void cftfcol(int n1, int n, double **a, double *w) { int i, j, j1, j2, j3, k, k1, ks, l, m; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; for (i = 0; i <= n1 - 1; i++) { l = 2; while ((l << 1) < n) { m = l << 2; for (j = 0; j <= l - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; a[i][j2] = x0r - x2r; a[i][j2 + 1] = x0i - x2i; a[i][j1] = x1r + x3i; a[i][j1 + 1] = x1i - x3r; a[i][j3] = x1r - x3i; a[i][j3 + 1] = x1i + x3r; } if (m < n) { wk1r = w[2]; for (j = m; j <= l + m - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; a[i][j2] = x0i - x2i; a[i][j2 + 1] = x2r - x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[i][j1] = wk1r * (x0i + x0r); a[i][j1 + 1] = wk1r * (x0i - x0r); x0r = x3i - x1r; x0i = x3r + x1i; a[i][j3] = wk1r * (x0r + x0i); a[i][j3 + 1] = wk1r * (x0r - x0i); } k1 = 1; ks = -1; for (k = (m << 1); k <= n - m; k += m) { k1++; ks = -ks; wk1r = w[k1 << 1]; wk1i = w[(k1 << 1) + 1]; wk2r = ks * w[k1]; wk2i = w[k1 + ks]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j <= l + k - 2; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[i][j] + a[i][j1]; x0i = a[i][j + 1] + a[i][j1 + 1]; x1r = a[i][j] - a[i][j1]; x1i = a[i][j + 1] - a[i][j1 + 1]; x2r = a[i][j2] + a[i][j3]; x2i = a[i][j2 + 1] + a[i][j3 + 1]; x3r = a[i][j2] - a[i][j3]; x3i = a[i][j2 + 1] - a[i][j3 + 1]; a[i][j] = x0r + x2r; a[i][j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[i][j2] = wk2r * x0r + wk2i * x0i; a[i][j2 + 1] = wk2r * x0i - wk2i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[i][j1] = wk1r * x0r + wk1i * x0i; a[i][j1 + 1] = wk1r * x0i - wk1i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[i][j3] = wk3r * x0r + wk3i * x0i; a[i][j3 + 1] = wk3r * x0i - wk3i * x0r; } } } l = m; } if (l < n) { for (j = 0; j <= l - 2; j += 2) { j1 = j + l; x0r = a[i][j] - a[i][j1]; x0i = a[i][j + 1] - a[i][j1 + 1]; a[i][j] += a[i][j1]; a[i][j + 1] += a[i][j1 + 1]; a[i][j1] = x0r; a[i][j1 + 1] = x0i; } } } } void cftfrow(int n, int n2, double **a, double *w) { int i, j, j1, j2, j3, k, k1, ks, l, m; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 1; while ((l << 1) < n) { m = l << 2; for (j = 0; j <= l - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; a[j2][i] = x0r - x2r; a[j2][i + 1] = x0i - x2i; a[j1][i] = x1r + x3i; a[j1][i + 1] = x1i - x3r; a[j3][i] = x1r - x3i; a[j3][i + 1] = x1i + x3r; } } if (m < n) { wk1r = w[2]; for (j = m; j <= l + m - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; a[j2][i] = x0i - x2i; a[j2][i + 1] = x2r - x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j1][i] = wk1r * (x0i + x0r); a[j1][i + 1] = wk1r * (x0i - x0r); x0r = x3i - x1r; x0i = x3r + x1i; a[j3][i] = wk1r * (x0r + x0i); a[j3][i + 1] = wk1r * (x0r - x0i); } } k1 = 1; ks = -1; for (k = (m << 1); k <= n - m; k += m) { k1++; ks = -ks; wk1r = w[k1 << 1]; wk1i = w[(k1 << 1) + 1]; wk2r = ks * w[k1]; wk2i = w[k1 + ks]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j <= l + k - 1; j++) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] + a[j1][i]; x0i = a[j][i + 1] + a[j1][i + 1]; x1r = a[j][i] - a[j1][i]; x1i = a[j][i + 1] - a[j1][i + 1]; x2r = a[j2][i] + a[j3][i]; x2i = a[j2][i + 1] + a[j3][i + 1]; x3r = a[j2][i] - a[j3][i]; x3i = a[j2][i + 1] - a[j3][i + 1]; a[j][i] = x0r + x2r; a[j][i + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2][i] = wk2r * x0r + wk2i * x0i; a[j2][i + 1] = wk2r * x0i - wk2i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j1][i] = wk1r * x0r + wk1i * x0i; a[j1][i + 1] = wk1r * x0i - wk1i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j3][i] = wk3r * x0r + wk3i * x0i; a[j3][i + 1] = wk3r * x0i - wk3i * x0r; } } } } l = m; } if (l < n) { for (j = 0; j <= l - 1; j++) { j1 = j + l; for (i = 0; i <= n2 - 2; i += 2) { x0r = a[j][i] - a[j1][i]; x0i = a[j][i + 1] - a[j1][i + 1]; a[j][i] += a[j1][i]; a[j][i + 1] += a[j1][i + 1]; a[j1][i] = x0r; a[j1][i + 1] = x0i; } } } } void rftbcol(int n1, int n, double **a, int nc, double *c) { int i, j, k, kk, ks; double wkr, wki, xr, xi, yr, yi; ks = (nc << 2) / n; for (i = 0; i <= n1 - 1; i++) { kk = 0; for (k = (n >> 1) - 2; k >= 2; k -= 2) { j = n - k; kk += ks; wkr = 0.5 - c[kk]; wki = c[nc - kk]; xr = a[i][k] - a[i][j]; xi = a[i][k + 1] + a[i][j + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[i][k] -= yr; a[i][k + 1] -= yi; a[i][j] += yr; a[i][j + 1] -= yi; } } } void rftfcol(int n1, int n, double **a, int nc, double *c) { int i, j, k, kk, ks; double wkr, wki, xr, xi, yr, yi; ks = (nc << 2) / n; for (i = 0; i <= n1 - 1; i++) { kk = 0; for (k = (n >> 1) - 2; k >= 2; k -= 2) { j = n - k; kk += ks; wkr = 0.5 - c[kk]; wki = c[nc - kk]; xr = a[i][k] - a[i][j]; xi = a[i][k + 1] + a[i][j + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[i][k] -= yr; a[i][k + 1] -= yi; a[i][j] += yr; a[i][j + 1] -= yi; } } } void dctbsub(int n1, int n2, double **a, int nc, double *c) { int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; ks1 = nc / n1; ks2 = nc / n2; n1h = n1 >> 1; kk1 = ks1; for (k1 = 1; k1 <= n1h - 1; k1++) { j1 = n1 - k1; w1r = 2 * c[kk1]; w1i = 2 * c[nc - kk1]; kk1 += ks1; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { x0r = w1r * c[kk2]; x0i = w1i * c[kk2]; x1r = w1r * c[nc - kk2]; x1i = w1i * c[nc - kk2]; wkr = x0r - x1i; wki = x0i + x1r; wji = x0r + x1i; wjr = x0i - x1r; kk2 += ks2; x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; a[k1][k2] = x0r + x1i; a[k1][k2 + 1] = x0i - x1r; a[j1][k2] = x1r + x0i; a[j1][k2 + 1] = x1i - x0r; } wkr = w1r * 0.5; wki = w1i * 0.5; wjr = w1r * c[kk2]; wji = w1i * c[kk2]; x0r = a[k1][0] + a[j1][0]; x0i = a[k1][1] - a[j1][1]; x1r = a[k1][0] - a[j1][0]; x1i = a[k1][1] + a[j1][1]; a[k1][0] = wkr * x0r - wki * x0i; a[k1][1] = wkr * x0i + wki * x0r; a[j1][0] = -wjr * x1r + wji * x1i; a[j1][1] = wjr * x1i + wji * x1r; } w1r = 2 * c[kk1]; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { wkr = 2 * c[kk2]; wki = 2 * c[nc - kk2]; wjr = w1r * wkr; wji = w1r * wki; kk2 += ks2; x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; a[0][k2] = wkr * a[0][k2] - wki * a[0][k2 + 1]; a[0][k2 + 1] = x0i; x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; a[n1h][k2] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; a[n1h][k2 + 1] = x0i; } a[0][1] *= w1r; a[n1h][0] *= w1r; a[n1h][1] *= 0.5; } void dctfsub(int n1, int n2, double **a, int nc, double *c) { int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; ks1 = nc / n1; ks2 = nc / n2; n1h = n1 >> 1; kk1 = ks1; for (k1 = 1; k1 <= n1h - 1; k1++) { j1 = n1 - k1; w1r = 2 * c[kk1]; w1i = 2 * c[nc - kk1]; kk1 += ks1; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { x0r = w1r * c[kk2]; x0i = w1i * c[kk2]; x1r = w1r * c[nc - kk2]; x1i = w1i * c[nc - kk2]; wkr = x0r - x1i; wki = x0i + x1r; wji = x0r + x1i; wjr = x0i - x1r; kk2 += ks2; x0r = a[k1][k2] - a[j1][k2 + 1]; x0i = a[j1][k2] + a[k1][k2 + 1]; x1r = a[j1][k2] - a[k1][k2 + 1]; x1i = a[k1][k2] + a[j1][k2 + 1]; a[k1][k2] = wkr * x0r + wki * x0i; a[k1][k2 + 1] = wkr * x0i - wki * x0r; a[j1][k2] = wjr * x1r + wji * x1i; a[j1][k2 + 1] = wjr * x1i - wji * x1r; } x0r = 2 * c[kk2]; wjr = x0r * w1r; wji = x0r * w1i; x0r = w1r * a[k1][0] + w1i * a[k1][1]; x0i = w1r * a[k1][1] - w1i * a[k1][0]; x1r = -wjr * a[j1][0] + wji * a[j1][1]; x1i = wjr * a[j1][1] + wji * a[j1][0]; a[k1][0] = x0r + x1r; a[k1][1] = x1i + x0i; a[j1][0] = x0r - x1r; a[j1][1] = x1i - x0i; } w1r = 2 * c[kk1]; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { wkr = 2 * c[kk2]; wki = 2 * c[nc - kk2]; wjr = w1r * wkr; wji = w1r * wki; kk2 += ks2; x0i = wkr * a[0][k2 + 1] - wki * a[0][k2]; a[0][k2] = wkr * a[0][k2] + wki * a[0][k2 + 1]; a[0][k2 + 1] = x0i; x0i = wjr * a[n1h][k2 + 1] - wji * a[n1h][k2]; a[n1h][k2] = wjr * a[n1h][k2] + wji * a[n1h][k2 + 1]; a[n1h][k2 + 1] = x0i; } w1r *= 2; a[0][0] *= 2; a[0][1] *= w1r; a[n1h][0] *= w1r; } void dstbsub(int n1, int n2, double **a, int nc, double *c) { int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; ks1 = nc / n1; ks2 = nc / n2; n1h = n1 >> 1; kk1 = ks1; for (k1 = 1; k1 <= n1h - 1; k1++) { j1 = n1 - k1; w1r = 2 * c[kk1]; w1i = 2 * c[nc - kk1]; kk1 += ks1; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { x0r = w1r * c[kk2]; x0i = w1i * c[kk2]; x1r = w1r * c[nc - kk2]; x1i = w1i * c[nc - kk2]; wkr = x0r - x1i; wki = x0i + x1r; wji = x0r + x1i; wjr = x0i - x1r; kk2 += ks2; x0r = wkr * a[k1][k2] - wki * a[k1][k2 + 1]; x0i = wkr * a[k1][k2 + 1] + wki * a[k1][k2]; x1r = wjr * a[j1][k2] - wji * a[j1][k2 + 1]; x1i = wjr * a[j1][k2 + 1] + wji * a[j1][k2]; a[k1][k2] = x1i - x0r; a[k1][k2 + 1] = x1r + x0i; a[j1][k2] = x0i - x1r; a[j1][k2 + 1] = x0r + x1i; } wkr = w1r * 0.5; wki = w1i * 0.5; wjr = w1r * c[kk2]; wji = w1i * c[kk2]; x0r = a[k1][0] + a[j1][0]; x0i = a[k1][1] - a[j1][1]; x1r = a[k1][0] - a[j1][0]; x1i = a[k1][1] + a[j1][1]; a[k1][1] = wkr * x0r - wki * x0i; a[k1][0] = wkr * x0i + wki * x0r; a[j1][1] = -wjr * x1r + wji * x1i; a[j1][0] = wjr * x1i + wji * x1r; } w1r = 2 * c[kk1]; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { wkr = 2 * c[kk2]; wki = 2 * c[nc - kk2]; wjr = w1r * wkr; wji = w1r * wki; kk2 += ks2; x0i = wkr * a[0][k2 + 1] + wki * a[0][k2]; a[0][k2 + 1] = wkr * a[0][k2] - wki * a[0][k2 + 1]; a[0][k2] = x0i; x0i = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; a[n1h][k2 + 1] = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; a[n1h][k2] = x0i; } a[0][1] *= w1r; a[n1h][0] *= w1r; a[n1h][1] *= 0.5; } void dstfsub(int n1, int n2, double **a, int nc, double *c) { int kk1, kk2, ks1, ks2, n1h, j1, k1, k2; double w1r, w1i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i; ks1 = nc / n1; ks2 = nc / n2; n1h = n1 >> 1; kk1 = ks1; for (k1 = 1; k1 <= n1h - 1; k1++) { j1 = n1 - k1; w1r = 2 * c[kk1]; w1i = 2 * c[nc - kk1]; kk1 += ks1; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { x0r = w1r * c[kk2]; x0i = w1i * c[kk2]; x1r = w1r * c[nc - kk2]; x1i = w1i * c[nc - kk2]; wkr = x0r - x1i; wki = x0i + x1r; wji = x0r + x1i; wjr = x0i - x1r; kk2 += ks2; x0r = a[j1][k2 + 1] - a[k1][k2]; x0i = a[k1][k2 + 1] + a[j1][k2]; x1r = a[k1][k2 + 1] - a[j1][k2]; x1i = a[j1][k2 + 1] + a[k1][k2]; a[k1][k2] = wkr * x0r + wki * x0i; a[k1][k2 + 1] = wkr * x0i - wki * x0r; a[j1][k2] = wjr * x1r + wji * x1i; a[j1][k2 + 1] = wjr * x1i - wji * x1r; } x0r = 2 * c[kk2]; wjr = x0r * w1r; wji = x0r * w1i; x0r = w1r * a[k1][1] + w1i * a[k1][0]; x0i = w1r * a[k1][0] - w1i * a[k1][1]; x1r = -wjr * a[j1][1] + wji * a[j1][0]; x1i = wjr * a[j1][0] + wji * a[j1][1]; a[k1][0] = x0r + x1r; a[k1][1] = x1i + x0i; a[j1][0] = x0r - x1r; a[j1][1] = x1i - x0i; } w1r = 2 * c[kk1]; kk2 = ks2; for (k2 = 2; k2 <= n2 - 2; k2 += 2) { wkr = 2 * c[kk2]; wki = 2 * c[nc - kk2]; wjr = w1r * wkr; wji = w1r * wki; kk2 += ks2; x0i = wkr * a[0][k2] - wki * a[0][k2 + 1]; a[0][k2] = wkr * a[0][k2 + 1] + wki * a[0][k2]; a[0][k2 + 1] = x0i; x0i = wjr * a[n1h][k2] - wji * a[n1h][k2 + 1]; a[n1h][k2] = wjr * a[n1h][k2 + 1] + wji * a[n1h][k2]; a[n1h][k2 + 1] = x0i; } w1r *= 2; a[0][0] *= 2; a[0][1] *= w1r; a[n1h][0] *= w1r; } OouraFFT-1.0/fft4f2d.f000066400000000000000000001577331246725775500144510ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :two ! data length :power of 2 ! decimation :frequency ! radix :4, 2, row-column ! data :inplace ! table :use ! subroutines ! cdft2d: Complex Discrete Fourier Transform ! rdft2d: Real Discrete Fourier Transform ! ddct2d: Discrete Cosine Transform ! ddst2d: Discrete Sine Transform ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * ! exp(2*pi*i*j1*k1/n1) * ! exp(2*pi*i*j2*k2/n2), ! 0<=k1 ! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * ! exp(-2*pi*i*j1*k1/n1) * ! exp(-2*pi*i*j2*k2/n2), ! 0<=k1 ! ip(0) = 0 ! first time only ! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! 2*n1 :data length (integer) ! n1 >= 1, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 1, n2 = power of 2 ! a(0:2*n1-1,0:n2-1) ! :input/output data (real*8) ! input data ! a(2*j1,j2) = Re(x(j1,j2)), ! a(2*j1+1,j2) = Im(x(j1,j2)), ! 0<=j1= 2+sqrt(n) ! (n = max(n1, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/2, n2/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft2d(n1max, 2*n1, n2, -1, a, ip, w) ! is ! call cdft2d(n1max, 2*n1, n2, 1, a, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, 2 * n1 - 1 ! a(j1, j2) = a(j1, j2) * (1.0d0 / (n1 * n2)) ! end do ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), ! 0<=k1 IRDFT (excluding scale) ! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 ! (R(j1,j2) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + ! I(j1,j2) * ! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), ! 0<=k1 ! ip(0) = 0 ! first time only ! call rdft2d(n1max, n1, n2, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft2d(n1max, n1, n2, -1, a, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! ! output data ! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), ! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), ! 0 ! input data ! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), ! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), ! 0= 2+sqrt(n) ! (n = max(n1/2, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/4, n2/2) + n1/4 ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft2d(n1max, n1, n2, 1, a, ip, w) ! is ! call rdft2d(n1max, n1, n2, -1, a, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (2.0d0 / (n1 * n2)) ! end do ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(pi*j1*(k1+1/2)/n1) * ! cos(pi*j2*(k2+1/2)/n2), ! 0<=k1 DCT ! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(pi*(j1+1/2)*k1/n1) * ! cos(pi*(j2+1/2)*k2/n2), ! 0<=k1 ! ip(0) = 0 ! first time only ! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! output data ! a(k1,k2) = C(k1,k2), 0<=k1= 2+sqrt(n) ! (n = max(n1/2, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/4, n2/2) + max(n1, n2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) ! is ! do j1 = 0, n1 - 1 ! a(j1, 0) = a(j1, 0) * 0.5d0 ! end do ! do j2 = 0, n2 - 1 ! a(0, j2) = a(0, j2) * 0.5d0 ! end do ! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) ! end do ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * ! sin(pi*j1*(k1+1/2)/n1) * ! sin(pi*j2*(k2+1/2)/n2), ! 0<=k1 DST ! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! sin(pi*(j1+1/2)*k1/n1) * ! sin(pi*(j2+1/2)*k2/n2), ! 0 ! ip(0) = 0 ! first time only ! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! ! input data ! a(j1,j2) = A(j1,j2), 0 ! output data ! a(k1,k2) = S(k1,k2), 0= 2+sqrt(n) ! (n = max(n1/2, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/4, n2/2) + max(n1, n2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) ! is ! do j1 = 0, n1 - 1 ! a(j1, 0) = a(j1, 0) * 0.5d0 ! end do ! do j2 = 0, n2 - 1 ! a(0, j2) = a(0, j2) * 0.5d0 ! end do ! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (4.0d0 / (n1 * n2)) ! end do ! end do ! . ! ! subroutine cdft2d(n1max, n1, n2, isgn, a, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) n = max(n1, 2 * n2) if (n .gt. 4 * ip(0)) then call makewt(n / 4, ip, w) end if if (n1 .gt. 4) then call bitrv2row(n1max, n1, n2, ip(2), a) end if if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), a) end if if (isgn .lt. 0) then call cftfrow(n1max, n1, n2, a, w) call cftfcol(n1max, n1, n2, a, w) else call cftbrow(n1max, n1, n2, a, w) call cftbcol(n1max, n1, n2, a, w) end if end ! subroutine rdft2d(n1max, n1, n2, isgn, a, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n2h, i, j real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi n = max(n1, 2 * n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n1 .gt. 4 * nc) then nc = n1 / 4 call makect(nc, ip, w(nw)) end if n2h = n2 / 2 if (isgn .lt. 0) then do i = 1, n2h - 1 j = n2 - i xi = a(0, i) - a(0, j) a(0, i) = a(0, i) + a(0, j) a(0, j) = xi xi = a(1, j) - a(1, i) a(1, i) = a(1, i) + a(1, j) a(1, j) = xi end do if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), a) end if call cftfcol(n1max, n1, n2, a, w) do i = 0, n2 - 1 a(1, i) = 0.5d0 * (a(0, i) - a(1, i)) a(0, i) = a(0, i) - a(1, i) end do if (n1 .gt. 4) then call rftfrow(n1max, n1, n2, a, nc, w(nw)) call bitrv2row(n1max, n1, n2, ip(2), a) end if call cftfrow(n1max, n1, n2, a, w) else if (n1 .gt. 4) then call bitrv2row(n1max, n1, n2, ip(2), a) end if call cftbrow(n1max, n1, n2, a, w) if (n1 .gt. 4) then call rftbrow(n1max, n1, n2, a, nc, w(nw)) end if do i = 0, n2 - 1 xi = a(0, i) - a(1, i) a(0, i) = a(0, i) + a(1, i) a(1, i) = xi end do if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), a) end if call cftbcol(n1max, n1, n2, a, w) do i = 1, n2h - 1 j = n2 - i a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) a(0, i) = a(0, i) - a(0, j) a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) a(1, i) = a(1, i) - a(1, j) end do end if end ! subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, & i, ix, ic, j, jx, jc real*8 a(0 : n1max - 1, 0 : n2 - 1), & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi n = max(n1, 2 * n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n1 .gt. nc .or. n2 .gt. nc) then nc = max(n1, n2) call makect(nc, ip, w(nw)) end if n1h = n1 / 2 n2h = n2 / 2 if (isgn .ge. 0) then do i = 0, n2 - 1 do j = 1, n1h - 1 jx = 2 * j t(jx, i) = a(j, i) t(jx + 1, i) = a(n1 - j, i) end do end do t(0, 0) = a(0, 0) t(1, 0) = a(n1h, 0) t(0, n2h) = a(0, n2h) t(1, n2h) = a(n1h, n2h) do i = 1, n2h - 1 ic = n2 - i t(0, i) = a(0, i) t(1, ic) = a(n1h, i) t(1, i) = a(0, ic) t(0, ic) = a(n1h, ic) end do call dctfsub(n1max, n1, n2, t, nc, w(nw)) if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), t) end if call cftfcol(n1max, n1, n2, t, w) do i = 0, n2 - 1 t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) t(0, i) = t(0, i) - t(1, i) end do if (n1 .gt. 4) then call rftfrow(n1max, n1, n2, t, nc, w(nw)) call bitrv2row(n1max, n1, n2, ip(2), t) end if call cftfrow(n1max, n1, n2, t, w) do i = 0, n2h - 1 ix = 2 * i ic = n2 - 1 - i do j = 0, n1h - 1 jx = 2 * j jc = n1 - 1 - j a(jx, ix) = t(j, i) a(jx + 1, ix) = t(jc, i) a(jx, ix + 1) = t(j, ic) a(jx + 1, ix + 1) = t(jc, ic) end do end do else do i = 0, n2h - 1 ix = 2 * i ic = n2 - 1 - i do j = 0, n1h - 1 jx = 2 * j jc = n1 - 1 - j t(j, i) = a(jx, ix) t(jc, i) = a(jx + 1, ix) t(j, ic) = a(jx, ix + 1) t(jc, ic) = a(jx + 1, ix + 1) end do end do if (n1 .gt. 4) then call bitrv2row(n1max, n1, n2, ip(2), t) end if call cftbrow(n1max, n1, n2, t, w) if (n1 .gt. 4) then call rftbrow(n1max, n1, n2, t, nc, w(nw)) end if do i = 0, n2 - 1 xi = t(0, i) - t(1, i) t(0, i) = t(0, i) + t(1, i) t(1, i) = xi end do if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), t) end if call cftbcol(n1max, n1, n2, t, w) call dctbsub(n1max, n1, n2, t, nc, w(nw)) do i = 0, n2 - 1 do j = 1, n1h - 1 jx = 2 * j a(j, i) = t(jx, i) a(n1 - j, i) = t(jx + 1, i) end do end do a(0, 0) = t(0, 0) a(n1h, 0) = t(1, 0) a(0, n2h) = t(0, n2h) a(n1h, n2h) = t(1, n2h) do i = 1, n2h - 1 ic = n2 - i a(0, i) = t(0, i) a(n1h, i) = t(1, ic) a(0, ic) = t(1, i) a(n1h, ic) = t(0, ic) end do end if end ! subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, n1h, n2h, & i, ix, ic, j, jx, jc real*8 a(0 : n1max - 1, 0 : n2 - 1), & t(0 : n1max - 1, 0 : n2 - 1), w(0 : *), xi n = max(n1, 2 * n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n1 .gt. nc .or. n2 .gt. nc) then nc = max(n1, n2) call makect(nc, ip, w(nw)) end if n1h = n1 / 2 n2h = n2 / 2 if (isgn .ge. 0) then do i = 0, n2 - 1 do j = 1, n1h - 1 jx = 2 * j t(jx, i) = a(j, i) t(jx + 1, i) = a(n1 - j, i) end do end do t(0, 0) = a(0, 0) t(1, 0) = a(n1h, 0) t(0, n2h) = a(0, n2h) t(1, n2h) = a(n1h, n2h) do i = 1, n2h - 1 ic = n2 - i t(0, i) = a(0, i) t(1, ic) = a(n1h, i) t(1, i) = a(0, ic) t(0, ic) = a(n1h, ic) end do call dstfsub(n1max, n1, n2, t, nc, w(nw)) if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), t) end if call cftfcol(n1max, n1, n2, t, w) do i = 0, n2 - 1 t(1, i) = 0.5d0 * (t(0, i) - t(1, i)) t(0, i) = t(0, i) - t(1, i) end do if (n1 .gt. 4) then call rftfrow(n1max, n1, n2, t, nc, w(nw)) call bitrv2row(n1max, n1, n2, ip(2), t) end if call cftfrow(n1max, n1, n2, t, w) do i = 0, n2h - 1 ix = 2 * i ic = n2 - 1 - i do j = 0, n1h - 1 jx = 2 * j jc = n1 - 1 - j a(jx, ix) = t(j, i) a(jx + 1, ix) = -t(jc, i) a(jx, ix + 1) = -t(j, ic) a(jx + 1, ix + 1) = t(jc, ic) end do end do else do i = 0, n2h - 1 ix = 2 * i ic = n2 - 1 - i do j = 0, n1h - 1 jx = 2 * j jc = n1 - 1 - j t(j, i) = a(jx, ix) t(jc, i) = -a(jx + 1, ix) t(j, ic) = -a(jx, ix + 1) t(jc, ic) = a(jx + 1, ix + 1) end do end do if (n1 .gt. 4) then call bitrv2row(n1max, n1, n2, ip(2), t) end if call cftbrow(n1max, n1, n2, t, w) if (n1 .gt. 4) then call rftbrow(n1max, n1, n2, t, nc, w(nw)) end if do i = 0, n2 - 1 xi = t(0, i) - t(1, i) t(0, i) = t(0, i) + t(1, i) t(1, i) = xi end do if (n2 .gt. 2) then call bitrv2col(n1max, n1, n2, ip(2), t) end if call cftbcol(n1max, n1, n2, t, w) call dstbsub(n1max, n1, n2, t, nc, w(nw)) do i = 0, n2 - 1 do j = 1, n1h - 1 jx = 2 * j a(j, i) = t(jx, i) a(n1 - j, i) = t(jx + 1, i) end do end do a(0, 0) = t(0, 0) a(n1h, 0) = t(1, 0) a(0, n2h) = t(0, n2h) a(n1h, n2h) = t(1, n2h) do i = 1, n2h - 1 ic = n2 - i a(0, i) = t(0, i) a(n1h, i) = t(1, ic) a(0, ic) = t(1, i) a(n1h, ic) = t(0, ic) end do end if end ! ! -------- initializing routines -------- ! subroutine makewt(nw, ip, w) integer nw, ip(0 : *), nwh, j real*8 w(0 : nw - 1), delta, x, y ip(0) = nw ip(1) = 1 if (nw .gt. 2) then nwh = nw / 2 delta = atan(1.0d0) / nwh w(0) = 1 w(1) = 0 w(nwh) = cos(delta * nwh) w(nwh + 1) = w(nwh) do j = 2, nwh - 2, 2 x = cos(delta * j) y = sin(delta * j) w(j) = x w(j + 1) = y w(nw - j) = y w(nw - j + 1) = x end do call bitrv2(nw, ip(2), w) end if end ! subroutine makect(nc, ip, c) integer nc, ip(0 : *), nch, j real*8 c(0 : nc - 1), delta ip(1) = nc if (nc .gt. 1) then nch = nc / 2 delta = atan(1.0d0) / nch c(0) = 0.5d0 c(nch) = 0.5d0 * cos(delta * nch) do j = 1, nch - 1 c(j) = 0.5d0 * cos(delta * j) c(nc - j) = 0.5d0 * sin(delta * j) end do end if end ! ! -------- child routines -------- ! subroutine bitrv2(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, m2 real*8 a(0 : n - 1), xr, xi ip(0) = 0 l = n m = 1 do while (4 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do if (4 * m .gt. l) then do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) a(j1) = a(k1) a(j1 + 1) = a(k1 + 1) a(k1) = xr a(k1 + 1) = xi end do end do else m2 = 2 * m do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) a(j1) = a(k1) a(j1 + 1) = a(k1 + 1) a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1) xi = a(j1 + 1) a(j1) = a(k1) a(j1 + 1) = a(k1 + 1) a(k1) = xr a(k1 + 1) = xi end do end do end if end ! subroutine bitrv2row(n1max, n, n2, ip, a) integer n1max, n, n2, ip(0 : *), i, j, j1, k, k1, l, m, m2 real*8 a(0 : n1max - 1, 0 : n2 - 1), xr, xi ip(0) = 0 l = n m = 1 do while (4 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do if (4 * m .gt. l) then do i = 0, n2 - 1 do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1, i) xi = a(j1 + 1, i) a(j1, i) = a(k1, i) a(j1 + 1, i) = a(k1 + 1, i) a(k1, i) = xr a(k1 + 1, i) = xi end do end do end do else m2 = 2 * m do i = 0, n2 - 1 do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1, i) xi = a(j1 + 1, i) a(j1, i) = a(k1, i) a(j1 + 1, i) = a(k1 + 1, i) a(k1, i) = xr a(k1 + 1, i) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1, i) xi = a(j1 + 1, i) a(j1, i) = a(k1, i) a(j1 + 1, i) = a(k1 + 1, i) a(k1, i) = xr a(k1 + 1, i) = xi end do end do end do end if end ! subroutine bitrv2col(n1max, n1, n, ip, a) integer n1max, n1, n, ip(0 : *), i, j, j1, k, k1, l, m real*8 a(0 : n1max - 1, 0 : n - 1), xr, xi ip(0) = 0 l = n m = 1 do while (2 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do if (2 * m .gt. l) then do k = 1, m - 1 do j = 0, k - 1 j1 = j + ip(k) k1 = k + ip(j) do i = 0, n1 - 2, 2 xr = a(i, j1) xi = a(i + 1, j1) a(i, j1) = a(i, k1) a(i + 1, j1) = a(i + 1, k1) a(i, k1) = xr a(i + 1, k1) = xi end do end do end do else do k = 1, m - 1 do j = 0, k - 1 j1 = j + ip(k) k1 = k + ip(j) do i = 0, n1 - 2, 2 xr = a(i, j1) xi = a(i + 1, j1) a(i, j1) = a(i, k1) a(i + 1, j1) = a(i + 1, k1) a(i, k1) = xr a(i + 1, k1) = xi end do j1 = j1 + m k1 = k1 + m do i = 0, n1 - 2, 2 xr = a(i, j1) xi = a(i + 1, j1) a(i, j1) = a(i, k1) a(i + 1, j1) = a(i + 1, k1) a(i, k1) = xr a(i + 1, k1) = xi end do end do end do end if end ! subroutine cftbrow(n1max, n, n2, a, w) integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i do i = 0, n2 - 1 l = 2 do while (2 * l .lt. n) m = 4 * l do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i a(j2, i) = x0r - x2r a(j2 + 1, i) = x0i - x2i a(j1, i) = x1r - x3i a(j1 + 1, i) = x1i + x3r a(j3, i) = x1r + x3i a(j3 + 1, i) = x1i - x3r end do if (m .lt. n) then wk1r = w(2) do j = m, l + m - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i a(j2, i) = x2i - x0i a(j2 + 1, i) = x0r - x2r x0r = x1r - x3i x0i = x1i + x3r a(j1, i) = wk1r * (x0r - x0i) a(j1 + 1, i) = wk1r * (x0r + x0i) x0r = x3i + x1r x0i = x3r - x1i a(j3, i) = wk1r * (x0i - x0r) a(j3 + 1, i) = wk1r * (x0i + x0r) end do k1 = 1 ks = -1 do k = 2 * m, n - m, m k1 = k1 + 1 ks = -ks wk1r = w(2 * k1) wk1i = w(2 * k1 + 1) wk2r = ks * w(k1) wk2i = w(k1 + ks) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i do j = k, l + k - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j2, i) = wk2r * x0r - wk2i * x0i a(j2 + 1, i) = wk2r * x0i + wk2i * x0r x0r = x1r - x3i x0i = x1i + x3r a(j1, i) = wk1r * x0r - wk1i * x0i a(j1 + 1, i) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3, i) = wk3r * x0r - wk3i * x0i a(j3 + 1, i) = wk3r * x0i + wk3i * x0r end do end do end if l = m end do if (l .lt. n) then do j = 0, l - 2, 2 j1 = j + l x0r = a(j, i) - a(j1, i) x0i = a(j + 1, i) - a(j1 + 1, i) a(j, i) = a(j, i) + a(j1, i) a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) a(j1, i) = x0r a(j1 + 1, i) = x0i end do end if end do end ! subroutine cftbcol(n1max, n1, n, a, w) integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i l = 1 do while (2 * l .lt. n) m = 4 * l do j = 0, l - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i a(i, j2) = x0r - x2r a(i + 1, j2) = x0i - x2i a(i, j1) = x1r - x3i a(i + 1, j1) = x1i + x3r a(i, j3) = x1r + x3i a(i + 1, j3) = x1i - x3r end do end do if (m .lt. n) then wk1r = w(2) do j = m, l + m - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i a(i, j2) = x2i - x0i a(i + 1, j2) = x0r - x2r x0r = x1r - x3i x0i = x1i + x3r a(i, j1) = wk1r * (x0r - x0i) a(i + 1, j1) = wk1r * (x0r + x0i) x0r = x3i + x1r x0i = x3r - x1i a(i, j3) = wk1r * (x0i - x0r) a(i + 1, j3) = wk1r * (x0i + x0r) end do end do k1 = 1 ks = -1 do k = 2 * m, n - m, m k1 = k1 + 1 ks = -ks wk1r = w(2 * k1) wk1i = w(2 * k1 + 1) wk2r = ks * w(k1) wk2i = w(k1 + ks) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i do j = k, l + k - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(i, j2) = wk2r * x0r - wk2i * x0i a(i + 1, j2) = wk2r * x0i + wk2i * x0r x0r = x1r - x3i x0i = x1i + x3r a(i, j1) = wk1r * x0r - wk1i * x0i a(i + 1, j1) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(i, j3) = wk3r * x0r - wk3i * x0i a(i + 1, j3) = wk3r * x0i + wk3i * x0r end do end do end do end if l = m end do if (l .lt. n) then do j = 0, l - 1 j1 = j + l do i = 0, n1 - 2, 2 x0r = a(i, j) - a(i, j1) x0i = a(i + 1, j) - a(i + 1, j1) a(i, j) = a(i, j) + a(i, j1) a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) a(i, j1) = x0r a(i + 1, j1) = x0i end do end do end if end ! subroutine cftfrow(n1max, n, n2, a, w) integer n1max, n, n2, i, j, j1, j2, j3, k, k1, ks, l, m real*8 a(0 : n1max - 1, 0 : n2 - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i do i = 0, n2 - 1 l = 2 do while (2 * l .lt. n) m = 4 * l do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i a(j2, i) = x0r - x2r a(j2 + 1, i) = x0i - x2i a(j1, i) = x1r + x3i a(j1 + 1, i) = x1i - x3r a(j3, i) = x1r - x3i a(j3 + 1, i) = x1i + x3r end do if (m .lt. n) then wk1r = w(2) do j = m, l + m - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i a(j2, i) = x0i - x2i a(j2 + 1, i) = x2r - x0r x0r = x1r + x3i x0i = x1i - x3r a(j1, i) = wk1r * (x0i + x0r) a(j1 + 1, i) = wk1r * (x0i - x0r) x0r = x3i - x1r x0i = x3r + x1i a(j3, i) = wk1r * (x0r + x0i) a(j3 + 1, i) = wk1r * (x0r - x0i) end do k1 = 1 ks = -1 do k = 2 * m, n - m, m k1 = k1 + 1 ks = -ks wk1r = w(2 * k1) wk1i = w(2 * k1 + 1) wk2r = ks * w(k1) wk2i = w(k1 + ks) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i do j = k, l + k - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j, i) + a(j1, i) x0i = a(j + 1, i) + a(j1 + 1, i) x1r = a(j, i) - a(j1, i) x1i = a(j + 1, i) - a(j1 + 1, i) x2r = a(j2, i) + a(j3, i) x2i = a(j2 + 1, i) + a(j3 + 1, i) x3r = a(j2, i) - a(j3, i) x3i = a(j2 + 1, i) - a(j3 + 1, i) a(j, i) = x0r + x2r a(j + 1, i) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j2, i) = wk2r * x0r + wk2i * x0i a(j2 + 1, i) = wk2r * x0i - wk2i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j1, i) = wk1r * x0r + wk1i * x0i a(j1 + 1, i) = wk1r * x0i - wk1i * x0r x0r = x1r - x3i x0i = x1i + x3r a(j3, i) = wk3r * x0r + wk3i * x0i a(j3 + 1, i) = wk3r * x0i - wk3i * x0r end do end do end if l = m end do if (l .lt. n) then do j = 0, l - 2, 2 j1 = j + l x0r = a(j, i) - a(j1, i) x0i = a(j + 1, i) - a(j1 + 1, i) a(j, i) = a(j, i) + a(j1, i) a(j + 1, i) = a(j + 1, i) + a(j1 + 1, i) a(j1, i) = x0r a(j1 + 1, i) = x0i end do end if end do end ! subroutine cftfcol(n1max, n1, n, a, w) integer n1max, n1, n, i, j, j1, j2, j3, k, k1, ks, l, m real*8 a(0 : n1max - 1, 0 : n - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i l = 1 do while (2 * l .lt. n) m = 4 * l do j = 0, l - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i a(i, j2) = x0r - x2r a(i + 1, j2) = x0i - x2i a(i, j1) = x1r + x3i a(i + 1, j1) = x1i - x3r a(i, j3) = x1r - x3i a(i + 1, j3) = x1i + x3r end do end do if (m .lt. n) then wk1r = w(2) do j = m, l + m - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i a(i, j2) = x0i - x2i a(i + 1, j2) = x2r - x0r x0r = x1r + x3i x0i = x1i - x3r a(i, j1) = wk1r * (x0i + x0r) a(i + 1, j1) = wk1r * (x0i - x0r) x0r = x3i - x1r x0i = x3r + x1i a(i, j3) = wk1r * (x0r + x0i) a(i + 1, j3) = wk1r * (x0r - x0i) end do end do k1 = 1 ks = -1 do k = 2 * m, n - m, m k1 = k1 + 1 ks = -ks wk1r = w(2 * k1) wk1i = w(2 * k1 + 1) wk2r = ks * w(k1) wk2i = w(k1 + ks) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i do j = k, l + k - 1 j1 = j + l j2 = j1 + l j3 = j2 + l do i = 0, n1 - 2, 2 x0r = a(i, j) + a(i, j1) x0i = a(i + 1, j) + a(i + 1, j1) x1r = a(i, j) - a(i, j1) x1i = a(i + 1, j) - a(i + 1, j1) x2r = a(i, j2) + a(i, j3) x2i = a(i + 1, j2) + a(i + 1, j3) x3r = a(i, j2) - a(i, j3) x3i = a(i + 1, j2) - a(i + 1, j3) a(i, j) = x0r + x2r a(i + 1, j) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(i, j2) = wk2r * x0r + wk2i * x0i a(i + 1, j2) = wk2r * x0i - wk2i * x0r x0r = x1r + x3i x0i = x1i - x3r a(i, j1) = wk1r * x0r + wk1i * x0i a(i + 1, j1) = wk1r * x0i - wk1i * x0r x0r = x1r - x3i x0i = x1i + x3r a(i, j3) = wk3r * x0r + wk3i * x0i a(i + 1, j3) = wk3r * x0i - wk3i * x0r end do end do end do end if l = m end do if (l .lt. n) then do j = 0, l - 1 j1 = j + l do i = 0, n1 - 2, 2 x0r = a(i, j) - a(i, j1) x0i = a(i + 1, j) - a(i + 1, j1) a(i, j) = a(i, j) + a(i, j1) a(i + 1, j) = a(i + 1, j) + a(i + 1, j1) a(i, j1) = x0r a(i + 1, j1) = x0i end do end do end if end ! subroutine rftbrow(n1max, n, n2, a, nc, c) integer n1max, n, n2, nc, i, j, k, kk, ks real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & wkr, wki, xr, xi, yr, yi ks = 4 * nc / n do i = 0, n2 - 1 kk = 0 do k = n / 2 - 2, 2, -2 j = n - k kk = kk + ks wkr = 0.5d0 - c(kk) wki = c(nc - kk) xr = a(k, i) - a(j, i) xi = a(k + 1, i) + a(j + 1, i) yr = wkr * xr - wki * xi yi = wkr * xi + wki * xr a(k, i) = a(k, i) - yr a(k + 1, i) = a(k + 1, i) - yi a(j, i) = a(j, i) + yr a(j + 1, i) = a(j + 1, i) - yi end do end do end ! subroutine rftfrow(n1max, n, n2, a, nc, c) integer n1max, n, n2, nc, i, j, k, kk, ks real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & wkr, wki, xr, xi, yr, yi ks = 4 * nc / n do i = 0, n2 - 1 kk = 0 do k = n / 2 - 2, 2, -2 j = n - k kk = kk + ks wkr = 0.5d0 - c(kk) wki = c(nc - kk) xr = a(k, i) - a(j, i) xi = a(k + 1, i) + a(j + 1, i) yr = wkr * xr + wki * xi yi = wkr * xi - wki * xr a(k, i) = a(k, i) - yr a(k + 1, i) = a(k + 1, i) - yi a(j, i) = a(j, i) + yr a(j + 1, i) = a(j + 1, i) - yi end do end do end ! subroutine dctbsub(n1max, n1, n2, a, nc, c) integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, & k1, k2 real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i ks1 = nc / n1 ks2 = nc / n2 n2h = n2 / 2 kk2 = ks2 do k2 = 1, n2h - 1 j2 = n2 - k2 w2r = 2 * c(kk2) w2i = 2 * c(nc - kk2) kk2 = kk2 + ks2 kk1 = ks1 do k1 = 2, n1 - 2, 2 x0r = w2r * c(kk1) x0i = w2i * c(kk1) x1r = w2r * c(nc - kk1) x1i = w2i * c(nc - kk1) wkr = x0r - x1i wki = x0i + x1r wji = x0r + x1i wjr = x0i - x1r kk1 = kk1 + ks1 x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) a(k1, k2) = x0r + x1i a(k1 + 1, k2) = x0i - x1r a(k1, j2) = x1r + x0i a(k1 + 1, j2) = x1i - x0r end do wkr = w2r * 0.5d0 wki = w2i * 0.5d0 wjr = w2r * c(kk1) wji = w2i * c(kk1) x0r = a(0, k2) + a(0, j2) x0i = a(1, k2) - a(1, j2) x1r = a(0, k2) - a(0, j2) x1i = a(1, k2) + a(1, j2) a(0, k2) = wkr * x0r - wki * x0i a(1, k2) = wkr * x0i + wki * x0r a(0, j2) = -wjr * x1r + wji * x1i a(1, j2) = wjr * x1i + wji * x1r end do w2r = 2 * c(kk2) kk1 = ks1 do k1 = 2, n1 - 2, 2 wkr = 2 * c(kk1) wki = 2 * c(nc - kk1) wjr = w2r * wkr wji = w2r * wki kk1 = kk1 + ks1 x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) a(k1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) a(k1 + 1, 0) = x0i x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) a(k1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) a(k1 + 1, n2h) = x0i end do a(1, 0) = a(1, 0) * w2r a(0, n2h) = a(0, n2h) * w2r a(1, n2h) = a(1, n2h) * 0.5d0 end ! subroutine dctfsub(n1max, n1, n2, a, nc, c) integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, & k1, k2 real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i ks1 = nc / n1 ks2 = nc / n2 n2h = n2 / 2 kk2 = ks2 do k2 = 1, n2h - 1 j2 = n2 - k2 w2r = 2 * c(kk2) w2i = 2 * c(nc - kk2) kk2 = kk2 + ks2 kk1 = ks1 do k1 = 2, n1 - 2, 2 x0r = w2r * c(kk1) x0i = w2i * c(kk1) x1r = w2r * c(nc - kk1) x1i = w2i * c(nc - kk1) wkr = x0r - x1i wki = x0i + x1r wji = x0r + x1i wjr = x0i - x1r kk1 = kk1 + ks1 x0r = a(k1, k2) - a(k1 + 1, j2) x0i = a(k1, j2) + a(k1 + 1, k2) x1r = a(k1, j2) - a(k1 + 1, k2) x1i = a(k1, k2) + a(k1 + 1, j2) a(k1, k2) = wkr * x0r + wki * x0i a(k1 + 1, k2) = wkr * x0i - wki * x0r a(k1, j2) = wjr * x1r + wji * x1i a(k1 + 1, j2) = wjr * x1i - wji * x1r end do x0r = 2 * c(kk1) wjr = x0r * w2r wji = x0r * w2i x0r = w2r * a(0, k2) + w2i * a(1, k2) x0i = w2r * a(1, k2) - w2i * a(0, k2) x1r = -wjr * a(0, j2) + wji * a(1, j2) x1i = wjr * a(1, j2) + wji * a(0, j2) a(0, k2) = x0r + x1r a(1, k2) = x1i + x0i a(0, j2) = x0r - x1r a(1, j2) = x1i - x0i end do w2r = 2 * c(kk2) kk1 = ks1 do k1 = 2, n1 - 2, 2 wkr = 2 * c(kk1) wki = 2 * c(nc - kk1) wjr = w2r * wkr wji = w2r * wki kk1 = kk1 + ks1 x0i = wkr * a(k1 + 1, 0) - wki * a(k1, 0) a(k1, 0) = wkr * a(k1, 0) + wki * a(k1 + 1, 0) a(k1 + 1, 0) = x0i x0i = wjr * a(k1 + 1, n2h) - wji * a(k1, n2h) a(k1, n2h) = wjr * a(k1, n2h) + wji * a(k1 + 1, n2h) a(k1 + 1, n2h) = x0i end do w2r = w2r * 2 a(0, 0) = a(0, 0) * 2 a(1, 0) = a(1, 0) * w2r a(0, n2h) = a(0, n2h) * w2r end ! subroutine dstbsub(n1max, n1, n2, a, nc, c) integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, & k1, k2 real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i ks1 = nc / n1 ks2 = nc / n2 n2h = n2 / 2 kk2 = ks2 do k2 = 1, n2h - 1 j2 = n2 - k2 w2r = 2 * c(kk2) w2i = 2 * c(nc - kk2) kk2 = kk2 + ks2 kk1 = ks1 do k1 = 2, n1 - 2, 2 x0r = w2r * c(kk1) x0i = w2i * c(kk1) x1r = w2r * c(nc - kk1) x1i = w2i * c(nc - kk1) wkr = x0r - x1i wki = x0i + x1r wji = x0r + x1i wjr = x0i - x1r kk1 = kk1 + ks1 x0r = wkr * a(k1, k2) - wki * a(k1 + 1, k2) x0i = wkr * a(k1 + 1, k2) + wki * a(k1, k2) x1r = wjr * a(k1, j2) - wji * a(k1 + 1, j2) x1i = wjr * a(k1 + 1, j2) + wji * a(k1, j2) a(k1, k2) = x1i - x0r a(k1 + 1, k2) = x1r + x0i a(k1, j2) = x0i - x1r a(k1 + 1, j2) = x0r + x1i end do wkr = w2r * 0.5d0 wki = w2i * 0.5d0 wjr = w2r * c(kk1) wji = w2i * c(kk1) x0r = a(0, k2) + a(0, j2) x0i = a(1, k2) - a(1, j2) x1r = a(0, k2) - a(0, j2) x1i = a(1, k2) + a(1, j2) a(1, k2) = wkr * x0r - wki * x0i a(0, k2) = wkr * x0i + wki * x0r a(1, j2) = -wjr * x1r + wji * x1i a(0, j2) = wjr * x1i + wji * x1r end do w2r = 2 * c(kk2) kk1 = ks1 do k1 = 2, n1 - 2, 2 wkr = 2 * c(kk1) wki = 2 * c(nc - kk1) wjr = w2r * wkr wji = w2r * wki kk1 = kk1 + ks1 x0i = wkr * a(k1 + 1, 0) + wki * a(k1, 0) a(k1 + 1, 0) = wkr * a(k1, 0) - wki * a(k1 + 1, 0) a(k1, 0) = x0i x0i = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) a(k1 + 1, n2h) = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) a(k1, n2h) = x0i end do a(1, 0) = a(1, 0) * w2r a(0, n2h) = a(0, n2h) * w2r a(1, n2h) = a(1, n2h) * 0.5d0 end ! subroutine dstfsub(n1max, n1, n2, a, nc, c) integer n1max, n1, n2, nc, kk1, kk2, ks1, ks2, n2h, j2, & k1, k2 real*8 a(0 : n1max - 1, 0 : n2 - 1), c(0 : nc - 1), & w2r, w2i, wkr, wki, wjr, wji, x0r, x0i, x1r, x1i ks1 = nc / n1 ks2 = nc / n2 n2h = n2 / 2 kk2 = ks2 do k2 = 1, n2h - 1 j2 = n2 - k2 w2r = 2 * c(kk2) w2i = 2 * c(nc - kk2) kk2 = kk2 + ks2 kk1 = ks1 do k1 = 2, n1 - 2, 2 x0r = w2r * c(kk1) x0i = w2i * c(kk1) x1r = w2r * c(nc - kk1) x1i = w2i * c(nc - kk1) wkr = x0r - x1i wki = x0i + x1r wji = x0r + x1i wjr = x0i - x1r kk1 = kk1 + ks1 x0r = a(k1 + 1, j2) - a(k1, k2) x0i = a(k1 + 1, k2) + a(k1, j2) x1r = a(k1 + 1, k2) - a(k1, j2) x1i = a(k1 + 1, j2) + a(k1, k2) a(k1, k2) = wkr * x0r + wki * x0i a(k1 + 1, k2) = wkr * x0i - wki * x0r a(k1, j2) = wjr * x1r + wji * x1i a(k1 + 1, j2) = wjr * x1i - wji * x1r end do x0r = 2 * c(kk1) wjr = x0r * w2r wji = x0r * w2i x0r = w2r * a(1, k2) + w2i * a(0, k2) x0i = w2r * a(0, k2) - w2i * a(1, k2) x1r = -wjr * a(1, j2) + wji * a(0, j2) x1i = wjr * a(0, j2) + wji * a(1, j2) a(0, k2) = x0r + x1r a(1, k2) = x1i + x0i a(0, j2) = x0r - x1r a(1, j2) = x1i - x0i end do w2r = 2 * c(kk2) kk1 = ks1 do k1 = 2, n1 - 2, 2 wkr = 2 * c(kk1) wki = 2 * c(nc - kk1) wjr = w2r * wkr wji = w2r * wki kk1 = kk1 + ks1 x0i = wkr * a(k1, 0) - wki * a(k1 + 1, 0) a(k1, 0) = wkr * a(k1 + 1, 0) + wki * a(k1, 0) a(k1 + 1, 0) = x0i x0i = wjr * a(k1, n2h) - wji * a(k1 + 1, n2h) a(k1, n2h) = wjr * a(k1 + 1, n2h) + wji * a(k1, n2h) a(k1 + 1, n2h) = x0i end do w2r = w2r * 2 a(0, 0) = a(0, 0) * 2 a(1, 0) = a(1, 0) * w2r a(0, n2h) = a(0, n2h) * w2r end ! OouraFFT-1.0/fft4g.c000066400000000000000000001120611246725775500142020ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :4, 2 data :inplace table :use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *, int *, double *); void rdft(int, int, double *, int *, double *); void ddct(int, int, double *, int *, double *); void ddst(int, int, double *, int *, double *); void dfct(int, double *, double *, int *, double *); void dfst(int, double *, double *, int *, double *); -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k ip[0] = 0; // first time only cdft(2*n, 1, a, ip, w); ip[0] = 0; // first time only cdft(2*n, -1, a, ip, w); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) strictly, length of ip >= 2+(1<<(int)(log(n+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft(2*n, -1, a, ip, w); is cdft(2*n, 1, a, ip, w); for (j = 0; j <= 2 * n - 1; j++) { a[j] *= 1.0 / n; } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k ip[0] = 0; // first time only rdft(n, 1, a, ip, w); ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft(n, 1, a, ip, w); is rdft(n, -1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ip[0] = 0; // first time only ddct(n, 1, a, ip, w); ip[0] = 0; // first time only ddct(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct(n, -1, a, ip, w); is a[0] *= 0.5; ddct(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ip[0] = 0; // first time only ddst(n, 1, a, ip, w); ip[0] = 0; // first time only ddst(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst(n, -1, a, ip, w); is a[0] *= 0.5; ddst(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- [definition] C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n [usage] ip[0] = 0; // first time only dfct(n, a, t, ip, w); [parameters] n :data length - 1 (int) n >= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n t[0...n/2] :work area (double *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of dfst(n, a, t, ip, w); is dfst(n, a, t, ip, w); for (j = 1; j <= n - 1; j++) { a[j] *= 2.0 / n; } . Appendix : The cos/sin table is recalculated when the larger table required. w[] and ip[] are compatible with all routines. */ void cdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void bitrv2(int n, int *ip, double *a); void bitrv2conj(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } if (n > 4) { if (isgn >= 0) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); } else { bitrv2conj(n, ip + 2, a); cftbsub(n, a, w); } } else if (n == 4) { cftfsub(n, a, w); } } void rdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); int nw, nc; double xi; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 2)) { nc = n >> 2; makect(nc, ip, w + nw); } if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } } void ddct(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } dctsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } dstsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } m = n >> 1; yi = a[m]; xi = a[0] + a[n]; a[0] -= a[n]; t[0] = xi - yi; t[m] = xi + yi; if (n > 2) { mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] - a[n - j]; xi = a[j] + a[n - j]; yr = a[k] - a[n - k]; yi = a[k] + a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi - yi; t[k] = xi + yi; } t[mh] = a[mh] + a[n - mh]; a[mh] -= a[n - mh]; dctsub(m, a, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, a); cftfsub(m, a, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, w); } a[n - 1] = a[0] - a[1]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] + a[j + 1]; a[2 * j - 1] = a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dctsub(m, t, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, t); cftfsub(m, t, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, w); } a[n - l] = t[0] - t[1]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = t[j] - t[j + 1]; a[k + l] = t[j] + t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 0; j < mh; j++) { k = m - j; t[j] = t[m + k] - t[m + j]; t[k] = t[m + k] + t[m + j]; } t[mh] = t[m + mh]; m = mh; } a[l] = t[0]; a[n] = t[2] - t[1]; a[0] = t[2] + t[1]; } else { a[1] = a[0]; a[2] = t[0]; a[0] = t[1]; } } void dfst(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } if (n > 2) { m = n >> 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] + a[n - j]; xi = a[j] - a[n - j]; yr = a[k] + a[n - k]; yi = a[k] - a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi + yi; t[k] = xi - yi; } t[0] = a[mh] - a[n - mh]; a[mh] += a[n - mh]; a[0] = a[m]; dstsub(m, a, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, a); cftfsub(m, a, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, w); } a[n - 1] = a[1] - a[0]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] - a[j + 1]; a[2 * j - 1] = -a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dstsub(m, t, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, t); cftfsub(m, t, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, w); } a[n - l] = t[1] - t[0]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = -t[j] - t[j + 1]; a[k + l] = t[j] - t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; t[j] = t[m + k] + t[m + j]; t[k] = t[m + k] - t[m + j]; } t[0] = t[m + mh]; m = mh; } a[l] = t[0]; } a[0] = 0; } /* -------- initializing routines -------- */ #include void makewt(int nw, int *ip, double *w) { void bitrv2(int n, int *ip, double *a); int j, nwh; double delta, x, y; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; w[0] = 1; w[1] = 0; w[nwh] = cos(delta * nwh); w[nwh + 1] = w[nwh]; if (nwh > 2) { for (j = 2; j < nwh; j += 2) { x = cos(delta * j); y = sin(delta * j); w[j] = x; w[j + 1] = y; w[nw - j] = y; w[nw - j + 1] = x; } bitrv2(nw, ip + 2, w); } } } void makect(int nc, int *ip, double *c) { int j, nch; double delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ void bitrv2(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi, yr, yi; ip[0] = 0; l = n; m = 1; while ((m << 3) < l) { l >>= 1; for (j = 0; j < m; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } m2 = 2 * m; if ((m << 3) == l) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 -= m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } j1 = 2 * k + m2 + ip[k]; k1 = j1 + m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 1; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } } void bitrv2conj(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi, yr, yi; ip[0] = 0; l = n; m = 1; while ((m << 3) < l) { l >>= 1; for (j = 0; j < m; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } m2 = 2 * m; if ((m << 3) == l) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 -= m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 2 * k + ip[k]; a[k1 + 1] = -a[k1 + 1]; j1 = k1 + m2; k1 = j1 + m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; k1 += m2; a[k1 + 1] = -a[k1 + 1]; } } else { a[1] = -a[1]; a[m2 + 1] = -a[m2 + 1]; for (k = 1; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 2 * k + ip[k]; a[k1 + 1] = -a[k1 + 1]; a[k1 + m2 + 1] = -a[k1 + m2 + 1]; } } } void cftfsub(int n, double *a, double *w) { void cft1st(int n, double *a, double *w); void cftmdl(int n, int l, double *a, double *w); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a, w); l = 8; while ((l << 2) < n) { cftmdl(n, l, a, w); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = a[j + 1] - a[j1 + 1]; a[j] += a[j1]; a[j + 1] += a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cftbsub(int n, double *a, double *w) { void cft1st(int n, double *a, double *w); void cftmdl(int n, int l, double *a, double *w); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a, w); l = 8; while ((l << 2) < n) { cftmdl(n, l, a, w); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i + x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i - x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i + x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = -a[j + 1] + a[j1 + 1]; a[j] += a[j1]; a[j + 1] = -a[j + 1] - a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cft1st(int n, double *a, double *w) { int j, k1, k2; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; x1i = a[1] - a[3]; x2r = a[4] + a[6]; x2i = a[5] + a[7]; x3r = a[4] - a[6]; x3i = a[5] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[4] = x0r - x2r; a[5] = x0i - x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; wk1r = w[2]; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; x1i = a[9] - a[11]; x2r = a[12] + a[14]; x2i = a[13] + a[15]; x3r = a[12] - a[14]; x3i = a[13] - a[15]; a[8] = x0r + x2r; a[9] = x0i + x2i; a[12] = x2i - x0i; a[13] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[10] = wk1r * (x0r - x0i); a[11] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[14] = wk1r * (x0i - x0r); a[15] = wk1r * (x0i + x0r); k1 = 0; for (j = 16; j < n; j += 16) { k1 += 2; k2 = 2 * k1; wk2r = w[k1]; wk2i = w[k1 + 1]; wk1r = w[k2]; wk1i = w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; x0r = a[j] + a[j + 2]; x0i = a[j + 1] + a[j + 3]; x1r = a[j] - a[j + 2]; x1i = a[j + 1] - a[j + 3]; x2r = a[j + 4] + a[j + 6]; x2i = a[j + 5] + a[j + 7]; x3r = a[j + 4] - a[j + 6]; x3i = a[j + 5] - a[j + 7]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 4] = wk2r * x0r - wk2i * x0i; a[j + 5] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 2] = wk1r * x0r - wk1i * x0i; a[j + 3] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; wk1r = w[k2 + 2]; wk1i = w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; x0r = a[j + 8] + a[j + 10]; x0i = a[j + 9] + a[j + 11]; x1r = a[j + 8] - a[j + 10]; x1i = a[j + 9] - a[j + 11]; x2r = a[j + 12] + a[j + 14]; x2i = a[j + 13] + a[j + 15]; x3r = a[j + 12] - a[j + 14]; x3i = a[j + 13] - a[j + 15]; a[j + 8] = x0r + x2r; a[j + 9] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 12] = -wk2i * x0r - wk2r * x0i; a[j + 13] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 10] = wk1r * x0r - wk1i * x0i; a[j + 11] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 14] = wk3r * x0r - wk3i * x0i; a[j + 15] = wk3r * x0i + wk3i * x0r; } } void cftmdl(int n, int l, double *a, double *w) { int j, j1, j2, j3, k, k1, k2, m, m2; double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; m = l << 2; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } wk1r = w[2]; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x2i - x0i; a[j2 + 1] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * (x0r - x0i); a[j1 + 1] = wk1r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[j3] = wk1r * (x0i - x0r); a[j3 + 1] = wk1r * (x0i + x0r); } k1 = 0; m2 = 2 * m; for (k = m2; k < n; k += m2) { k1 += 2; k2 = 2 * k1; wk2r = w[k1]; wk2i = w[k1 + 1]; wk1r = w[k2]; wk1i = w[k2 + 1]; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j < l + k; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = wk2r * x0r - wk2i * x0i; a[j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } wk1r = w[k2 + 2]; wk1i = w[k2 + 3]; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; for (j = k + m; j < l + (k + m); j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = -wk2i * x0r - wk2r * x0i; a[j2 + 1] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } } } void rftfsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void rftbsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] = yi - a[j + 1]; a[k] += yr; a[k + 1] = yi - a[k + 1]; } a[m + 1] = -a[m + 1]; } void dctsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[j] - wkr * a[k]; a[j] = wkr * a[j] + wki * a[k]; a[k] = xr; } a[m] *= c[0]; } void dstsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[k] - wkr * a[j]; a[k] = wkr * a[k] + wki * a[j]; a[j] = xr; } a[m] *= c[0]; } OouraFFT-1.0/fft4g.f000066400000000000000000001126041246725775500142100ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :one ! data length :power of 2 ! decimation :frequency ! radix :4, 2 ! data :inplace ! table :use ! subroutines ! cdft: Complex Discrete Fourier Transform ! rdft: Real Discrete Fourier Transform ! ddct: Discrete Cosine Transform ! ddst: Discrete Sine Transform ! dfct: Cosine Transform of RDFT (Real Symmetric DFT) ! dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k ! X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call cdft(2*n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft(2*n, -1, a, ip, w) ! [parameters] ! 2*n :data length (integer) ! n >= 1, n = power of 2 ! a(0:2*n-1) :input/output data (real*8) ! input data ! a(2*j) = Re(x(j)), ! a(2*j+1) = Im(x(j)), 0<=j= 2+sqrt(n) ! strictly, ! length of ip >= ! 2+2**(int(log(n+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft(2*n, -1, a, ip, w) ! is ! call cdft(2*n, 1, a, ip, w) ! do j = 0, 2 * n - 1 ! a(j) = a(j) / n ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2 ! I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) ! a(k) = (R(0) + R(n/2)*cos(pi*k))/2 + ! sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) + ! sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call rdft(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! output data ! a(2*k) = R(k), 0<=k ! input data ! a(2*j) = R(j), 0<=j= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft(n, 1, a, ip, w) ! is ! call rdft(n, -1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k DCT ! C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k ! ip(0) = 0 ! first time only ! call ddct(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddct(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k DST ! S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0 ! ip(0) = 0 ! first time only ! call ddst(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! input data ! a(j) = A(j), 0 ! output data ! a(k) = S(k), 0= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddst(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- ! [definition] ! C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n ! [usage] ! ip(0) = 0 ! first time only ! call dfct(n, a, t, ip, w) ! [parameters] ! n :data length - 1 (integer) ! n >= 2, n = power of 2 ! a(0:n) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k<=n ! t(0:n/2) :work area (real*8) ! ip(0:*) :work area for bit reversal (integer) ! length of ip >= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! is ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! do j = 0, n ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- ! [definition] ! S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = S(k), 0= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call dfst(n, a, t, ip, w) ! is ! call dfst(n, a, t, ip, w) ! do j = 1, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! Appendix : ! The cos/sin table is recalculated when the larger table required. ! w() and ip() are compatible with all routines. ! ! subroutine cdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *) real*8 a(0 : n - 1), w(0 : *) if (n .gt. 4 * ip(0)) then call makewt(n / 4, ip, w) end if if (n .gt. 4) then if (isgn .ge. 0) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) else call bitrv2conj(n, ip(2), a) call cftbsub(n, a, w) end if else if (n .eq. 4) then call cftfsub(n, a, w) end if end ! subroutine rdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), nw, nc real*8 a(0 : n - 1), w(0 : *), xi nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 4 * nc) then nc = n / 4 call makect(nc, ip, w(nw)) end if if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xi = a(0) - a(1) a(0) = a(0) + a(1) a(1) = xi else a(1) = 0.5d0 * (a(0) - a(1)) a(0) = a(0) - a(1) if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if end ! subroutine ddct(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = a(j) - a(j - 1) a(j) = a(j) + a(j - 1) end do a(1) = a(0) - xr a(0) = a(0) + xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if call dctsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = a(j) - a(j + 1) a(j) = a(j) + a(j + 1) end do a(n - 1) = xr end if end ! subroutine ddst(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = -a(j) - a(j - 1) a(j) = a(j) - a(j - 1) end do a(1) = a(0) + xr a(0) = a(0) - xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if call dstsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = -a(j) - a(j + 1) a(j) = a(j) - a(j + 1) end do a(n - 1) = -xr end if end ! subroutine dfct(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if m = n / 2 yi = a(m) xi = a(0) + a(n) a(0) = a(0) - a(n) t(0) = xi - yi t(m) = xi + yi if (n .gt. 2) then mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) - a(n - j) xi = a(j) + a(n - j) yr = a(k) - a(n - k) yi = a(k) + a(n - k) a(j) = xr a(k) = yr t(j) = xi - yi t(k) = xi + yi end do t(mh) = a(mh) + a(n - mh) a(mh) = a(mh) - a(n - mh) call dctsub(m, a, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), a) call cftfsub(m, a, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, w) end if a(n - 1) = a(0) - a(1) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) + a(j + 1) a(2 * j - 1) = a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dctsub(m, t, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), t) call cftfsub(m, t, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, w) end if a(n - l) = t(0) - t(1) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = t(j) - t(j + 1) a(k + l) = t(j) + t(j + 1) end do l = 2 * l mh = m / 2 do j = 0, mh - 1 k = m - j t(j) = t(m + k) - t(m + j) t(k) = t(m + k) + t(m + j) end do t(mh) = t(m + mh) m = mh end do a(l) = t(0) a(n) = t(2) - t(1) a(0) = t(2) + t(1) else a(1) = a(0) a(2) = t(0) a(0) = t(1) end if end ! subroutine dfst(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if if (n .gt. 2) then m = n / 2 mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) + a(n - j) xi = a(j) - a(n - j) yr = a(k) + a(n - k) yi = a(k) - a(n - k) a(j) = xr a(k) = yr t(j) = xi + yi t(k) = xi - yi end do t(0) = a(mh) - a(n - mh) a(mh) = a(mh) + a(n - mh) a(0) = a(m) call dstsub(m, a, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), a) call cftfsub(m, a, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, w) end if a(n - 1) = a(1) - a(0) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) - a(j + 1) a(2 * j - 1) = -a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dstsub(m, t, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), t) call cftfsub(m, t, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, w) end if a(n - l) = t(1) - t(0) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = -t(j) - t(j + 1) a(k + l) = t(j) - t(j + 1) end do l = 2 * l mh = m / 2 do j = 1, mh - 1 k = m - j t(j) = t(m + k) + t(m + j) t(k) = t(m + k) - t(m + j) end do t(0) = t(m + mh) m = mh end do a(l) = t(0) end if a(0) = 0 end ! ! -------- initializing routines -------- ! subroutine makewt(nw, ip, w) integer nw, ip(0 : *), j, nwh real*8 w(0 : nw - 1), delta, x, y ip(0) = nw ip(1) = 1 if (nw .gt. 2) then nwh = nw / 2 delta = atan(1.0d0) / nwh w(0) = 1 w(1) = 0 w(nwh) = cos(delta * nwh) w(nwh + 1) = w(nwh) if (nwh .gt. 2) then do j = 2, nwh - 2, 2 x = cos(delta * j) y = sin(delta * j) w(j) = x w(j + 1) = y w(nw - j) = y w(nw - j + 1) = x end do call bitrv2(nw, ip(2), w) end if end if end ! subroutine makect(nc, ip, c) integer nc, ip(0 : *), j, nch real*8 c(0 : nc - 1), delta ip(1) = nc if (nc .gt. 1) then nch = nc / 2 delta = atan(1.0d0) / nch c(0) = cos(delta * nch) c(nch) = 0.5d0 * c(0) do j = 1, nch - 1 c(j) = 0.5d0 * cos(delta * j) c(nc - j) = 0.5d0 * sin(delta * j) end do end if end ! ! -------- child routines -------- ! subroutine bitrv2(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, m2 real*8 a(0 : n - 1), xr, xi, yr, yi ip(0) = 0 l = n m = 1 do while (8 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do m2 = 2 * m if (8 * m .eq. l) then do k = 0, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 - m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do j1 = 2 * k + m2 + ip(k) k1 = j1 + m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do else do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do end do end if end ! subroutine bitrv2conj(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, m2 real*8 a(0 : n - 1), xr, xi, yr, yi ip(0) = 0 l = n m = 1 do while (8 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do m2 = 2 * m if (8 * m .eq. l) then do k = 0, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 - m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 2 * k + ip(k) a(k1 + 1) = -a(k1 + 1) j1 = k1 + m2 k1 = j1 + m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi k1 = k1 + m2 a(k1 + 1) = -a(k1 + 1) end do else a(1) = -a(1) a(m2 + 1) = -a(m2 + 1) do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 2 * k + ip(k) a(k1 + 1) = -a(k1 + 1) a(k1 + m2 + 1) = -a(k1 + m2 + 1) end do end if end ! subroutine cftfsub(n, a, w) integer n, j, j1, j2, j3, l real*8 a(0 : n - 1), w(0 : *) real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i l = 2 if (n .gt. 8) then call cft1st(n, a, w) l = 8 do while (4 * l .lt. n) call cftmdl(n, l, a, w) l = 4 * l end do end if if (4 * l .eq. n) then do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j2) = x0r - x2r a(j2 + 1) = x0i - x2i a(j1) = x1r - x3i a(j1 + 1) = x1i + x3r a(j3) = x1r + x3i a(j3 + 1) = x1i - x3r end do else do j = 0, l - 2, 2 j1 = j + l x0r = a(j) - a(j1) x0i = a(j + 1) - a(j1 + 1) a(j) = a(j) + a(j1) a(j + 1) = a(j + 1) + a(j1 + 1) a(j1) = x0r a(j1 + 1) = x0i end do end if end ! subroutine cftbsub(n, a, w) integer n, j, j1, j2, j3, l real*8 a(0 : n - 1), w(0 : *) real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i l = 2 if (n .gt. 8) then call cft1st(n, a, w) l = 8 do while (4 * l .lt. n) call cftmdl(n, l, a, w) l = 4 * l end do end if if (4 * l .eq. n) then do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = -a(j + 1) - a(j1 + 1) x1r = a(j) - a(j1) x1i = -a(j + 1) + a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i - x2i a(j2) = x0r - x2r a(j2 + 1) = x0i + x2i a(j1) = x1r - x3i a(j1 + 1) = x1i - x3r a(j3) = x1r + x3i a(j3 + 1) = x1i + x3r end do else do j = 0, l - 2, 2 j1 = j + l x0r = a(j) - a(j1) x0i = -a(j + 1) + a(j1 + 1) a(j) = a(j) + a(j1) a(j + 1) = -a(j + 1) - a(j1 + 1) a(j1) = x0r a(j1 + 1) = x0i end do end if end ! subroutine cft1st(n, a, w) integer n, j, k1, k2 real*8 a(0 : n - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i x0r = a(0) + a(2) x0i = a(1) + a(3) x1r = a(0) - a(2) x1i = a(1) - a(3) x2r = a(4) + a(6) x2i = a(5) + a(7) x3r = a(4) - a(6) x3i = a(5) - a(7) a(0) = x0r + x2r a(1) = x0i + x2i a(4) = x0r - x2r a(5) = x0i - x2i a(2) = x1r - x3i a(3) = x1i + x3r a(6) = x1r + x3i a(7) = x1i - x3r wk1r = w(2) x0r = a(8) + a(10) x0i = a(9) + a(11) x1r = a(8) - a(10) x1i = a(9) - a(11) x2r = a(12) + a(14) x2i = a(13) + a(15) x3r = a(12) - a(14) x3i = a(13) - a(15) a(8) = x0r + x2r a(9) = x0i + x2i a(12) = x2i - x0i a(13) = x0r - x2r x0r = x1r - x3i x0i = x1i + x3r a(10) = wk1r * (x0r - x0i) a(11) = wk1r * (x0r + x0i) x0r = x3i + x1r x0i = x3r - x1i a(14) = wk1r * (x0i - x0r) a(15) = wk1r * (x0i + x0r) k1 = 0 do j = 16, n - 16, 16 k1 = k1 + 2 k2 = 2 * k1 wk2r = w(k1) wk2i = w(k1 + 1) wk1r = w(k2) wk1i = w(k2 + 1) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i x0r = a(j) + a(j + 2) x0i = a(j + 1) + a(j + 3) x1r = a(j) - a(j + 2) x1i = a(j + 1) - a(j + 3) x2r = a(j + 4) + a(j + 6) x2i = a(j + 5) + a(j + 7) x3r = a(j + 4) - a(j + 6) x3i = a(j + 5) - a(j + 7) a(j) = x0r + x2r a(j + 1) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j + 4) = wk2r * x0r - wk2i * x0i a(j + 5) = wk2r * x0i + wk2i * x0r x0r = x1r - x3i x0i = x1i + x3r a(j + 2) = wk1r * x0r - wk1i * x0i a(j + 3) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j + 6) = wk3r * x0r - wk3i * x0i a(j + 7) = wk3r * x0i + wk3i * x0r wk1r = w(k2 + 2) wk1i = w(k2 + 3) wk3r = wk1r - 2 * wk2r * wk1i wk3i = 2 * wk2r * wk1r - wk1i x0r = a(j + 8) + a(j + 10) x0i = a(j + 9) + a(j + 11) x1r = a(j + 8) - a(j + 10) x1i = a(j + 9) - a(j + 11) x2r = a(j + 12) + a(j + 14) x2i = a(j + 13) + a(j + 15) x3r = a(j + 12) - a(j + 14) x3i = a(j + 13) - a(j + 15) a(j + 8) = x0r + x2r a(j + 9) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j + 12) = -wk2i * x0r - wk2r * x0i a(j + 13) = -wk2i * x0i + wk2r * x0r x0r = x1r - x3i x0i = x1i + x3r a(j + 10) = wk1r * x0r - wk1i * x0i a(j + 11) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j + 14) = wk3r * x0r - wk3i * x0i a(j + 15) = wk3r * x0i + wk3i * x0r end do end ! subroutine cftmdl(n, l, a, w) integer n, l, j, j1, j2, j3, k, k1, k2, m, m2 real*8 a(0 : n - 1), w(0 : *) real*8 wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i m = 4 * l do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j2) = x0r - x2r a(j2 + 1) = x0i - x2i a(j1) = x1r - x3i a(j1 + 1) = x1i + x3r a(j3) = x1r + x3i a(j3 + 1) = x1i - x3r end do wk1r = w(2) do j = m, l + m - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j2) = x2i - x0i a(j2 + 1) = x0r - x2r x0r = x1r - x3i x0i = x1i + x3r a(j1) = wk1r * (x0r - x0i) a(j1 + 1) = wk1r * (x0r + x0i) x0r = x3i + x1r x0i = x3r - x1i a(j3) = wk1r * (x0i - x0r) a(j3 + 1) = wk1r * (x0i + x0r) end do k1 = 0 m2 = 2 * m do k = m2, n - m2, m2 k1 = k1 + 2 k2 = 2 * k1 wk2r = w(k1) wk2i = w(k1 + 1) wk1r = w(k2) wk1i = w(k2 + 1) wk3r = wk1r - 2 * wk2i * wk1i wk3i = 2 * wk2i * wk1r - wk1i do j = k, l + k - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j2) = wk2r * x0r - wk2i * x0i a(j2 + 1) = wk2r * x0i + wk2i * x0r x0r = x1r - x3i x0i = x1i + x3r a(j1) = wk1r * x0r - wk1i * x0i a(j1 + 1) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3r * x0r - wk3i * x0i a(j3 + 1) = wk3r * x0i + wk3i * x0r end do wk1r = w(k2 + 2) wk1i = w(k2 + 3) wk3r = wk1r - 2 * wk2r * wk1i wk3i = 2 * wk2r * wk1r - wk1i do j = k + m, l + (k + m) - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i x0r = x0r - x2r x0i = x0i - x2i a(j2) = -wk2i * x0r - wk2r * x0i a(j2 + 1) = -wk2i * x0i + wk2r * x0r x0r = x1r - x3i x0i = x1i + x3r a(j1) = wk1r * x0r - wk1i * x0i a(j1 + 1) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3r * x0r - wk3i * x0i a(j3 + 1) = wk3r * x0i + wk3i * x0r end do end do end ! subroutine rftfsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr - wki * xi yi = wkr * xi + wki * xr a(j) = a(j) - yr a(j + 1) = a(j + 1) - yi a(k) = a(k) + yr a(k + 1) = a(k + 1) - yi end do end ! subroutine rftbsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi a(1) = -a(1) m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr + wki * xi yi = wkr * xi - wki * xr a(j) = a(j) - yr a(j + 1) = yi - a(j + 1) a(k) = a(k) + yr a(k + 1) = yi - a(k + 1) end do a(m + 1) = -a(m + 1) end ! subroutine dctsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(j) - wkr * a(k) a(j) = wkr * a(j) + wki * a(k) a(k) = xr end do a(m) = c(0) * a(m) end ! subroutine dstsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(k) - wkr * a(j) a(k) = wkr * a(k) + wki * a(j) a(j) = xr end do a(m) = c(0) * a(m) end ! OouraFFT-1.0/fft4g_h.c000066400000000000000000001060111246725775500145070ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :4, 2 data :inplace table :not use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *); void rdft(int, int, double *); void ddct(int, int, double *); void ddst(int, int, double *); void dfct(int, double *); void dfst(int, double *); -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k cdft(2*n, 1, a); cdft(2*n, -1, a); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k rdft(n, 1, a); rdft(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ddct(n, 1, a); ddct(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ddst(n, 1, a); ddst(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0 4) { if (isgn >= 0) { bitrv2(n, a); cftfsub(n, a); } else { bitrv2conj(n, a); cftbsub(n, a); } } else if (n == 4) { cftfsub(n, a); } } void rdft(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); double xi; if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } } void ddct(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dctsub(int n, double *a); void dctsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } if (n > 4) { dctsub(n, a); } else { dctsub4(n, a); } if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dstsub(int n, double *a); void dstsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } if (n > 4) { dstsub(n, a); } else { dstsub4(n, a); } if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a) { void ddct(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi, an; m = n >> 1; for (j = 0; j < m; j++) { k = n - j; xr = a[j] + a[k]; a[j] -= a[k]; a[k] = xr; } an = a[n]; while (m >= 2) { ddct(m, 1, a); bitrv1(m, a); mh = m >> 1; xi = a[m]; a[m] = a[0]; a[0] = an - xi; an += xi; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr - xi; a[k] = xr + xi; } xr = a[mh]; a[mh] = a[m + mh]; a[m + mh] = xr; m = mh; } xi = a[1]; a[1] = a[0]; a[0] = an + xi; a[n] = an - xi; bitrv1(n, a); } void dfst(int n, double *a) { void ddst(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi; m = n >> 1; for (j = 1; j < m; j++) { k = n - j; xr = a[j] - a[k]; a[j] += a[k]; a[k] = xr; } a[0] = a[m]; while (m >= 2) { ddst(m, 1, a); bitrv1(m, a); mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr + xi; a[k] = xr - xi; } a[m] = a[0]; a[0] = a[m + mh]; a[m + mh] = a[mh]; m = mh; } a[1] = a[0]; a[0] = 0; bitrv1(n, a); } /* -------- child routines -------- */ #include #ifndef M_PI_2 #define M_PI_2 1.570796326794896619231321691639751442098584699687 #endif #ifndef WR5000 /* cos(M_PI_2*0.5000) */ #define WR5000 0.707106781186547524400844362104849039284835937688 #endif #ifndef WR2500 /* cos(M_PI_2*0.2500) */ #define WR2500 0.923879532511286756128183189396788286822416625863 #endif #ifndef WI2500 /* sin(M_PI_2*0.2500) */ #define WI2500 0.382683432365089771728459984030398866761344562485 #endif #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ #define RDFT_LOOP_DIV 64 #endif #ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */ #define DCST_LOOP_DIV 64 #endif void bitrv2(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double xr, xi, yr, yi; l = n >> 2; m = 2; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0 += 2) { k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } j1 = j0 + k0 + m; k1 = j1 + m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } } } } void bitrv2conj(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double xr, xi, yr, yi; l = n >> 2; m = 2; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0 += 2) { k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; a[k1 + 1] = -a[k1 + 1]; j1 = k1 + m; k1 = j1 + m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; k1 += m; a[k1 + 1] = -a[k1 + 1]; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { a[1] = -a[1]; a[m + 1] = -a[m + 1]; j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; a[k1 + 1] = -a[k1 + 1]; a[k1 + m + 1] = -a[k1 + m + 1]; } } } void bitrv1(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double x; l = n >> 2; m = 1; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0++) { k = k0; for (j = j0; j < j0 + k0; j++) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + m; k1 = k + 2 * m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 -= m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 += 2 * m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (k ^= i); i >>= 1); } j1 = j0 + k0 + m; k1 = j1 + m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 1; k0 < m; k0++) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j++) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + m; k1 = k + m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (k ^= i); i >>= 1); } } } } void cftfsub(int n, double *a) { void cft1st(int n, double *a); void cftmdl(int n, int l, double *a); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a); l = 8; while ((l << 2) < n) { cftmdl(n, l, a); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = a[j + 1] - a[j1 + 1]; a[j] += a[j1]; a[j + 1] += a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cftbsub(int n, double *a) { void cft1st(int n, double *a); void cftmdl(int n, int l, double *a); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n > 8) { cft1st(n, a); l = 8; while ((l << 2) < n) { cftmdl(n, l, a); l <<= 2; } } if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i + x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i - x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i + x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = -a[j + 1] + a[j1 + 1]; a[j] += a[j1]; a[j + 1] = -a[j + 1] - a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cft1st(int n, double *a) { int j, kj, kr; double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; x1i = a[1] - a[3]; x2r = a[4] + a[6]; x2i = a[5] + a[7]; x3r = a[4] - a[6]; x3i = a[5] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[4] = x0r - x2r; a[5] = x0i - x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; wn4r = WR5000; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; x1i = a[9] - a[11]; x2r = a[12] + a[14]; x2i = a[13] + a[15]; x3r = a[12] - a[14]; x3i = a[13] - a[15]; a[8] = x0r + x2r; a[9] = x0i + x2i; a[12] = x2i - x0i; a[13] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[10] = wn4r * (x0r - x0i); a[11] = wn4r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[14] = wn4r * (x0i - x0r); a[15] = wn4r * (x0i + x0r); ew = M_PI_2 / n; kr = 0; for (j = 16; j < n; j += 16) { for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); wk1r = cos(ew * kr); wk1i = sin(ew * kr); wk2r = 1 - 2 * wk1i * wk1i; wk2i = 2 * wk1i * wk1r; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; x0r = a[j] + a[j + 2]; x0i = a[j + 1] + a[j + 3]; x1r = a[j] - a[j + 2]; x1i = a[j + 1] - a[j + 3]; x2r = a[j + 4] + a[j + 6]; x2i = a[j + 5] + a[j + 7]; x3r = a[j + 4] - a[j + 6]; x3i = a[j + 5] - a[j + 7]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 4] = wk2r * x0r - wk2i * x0i; a[j + 5] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 2] = wk1r * x0r - wk1i * x0i; a[j + 3] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; x0r = wn4r * (wk1r - wk1i); wk1i = wn4r * (wk1r + wk1i); wk1r = x0r; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; x0r = a[j + 8] + a[j + 10]; x0i = a[j + 9] + a[j + 11]; x1r = a[j + 8] - a[j + 10]; x1i = a[j + 9] - a[j + 11]; x2r = a[j + 12] + a[j + 14]; x2i = a[j + 13] + a[j + 15]; x3r = a[j + 12] - a[j + 14]; x3i = a[j + 13] - a[j + 15]; a[j + 8] = x0r + x2r; a[j + 9] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j + 12] = -wk2i * x0r - wk2r * x0i; a[j + 13] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j + 10] = wk1r * x0r - wk1i * x0i; a[j + 11] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j + 14] = wk3r * x0r - wk3i * x0i; a[j + 15] = wk3r * x0i + wk3i * x0r; } } void cftmdl(int n, int l, double *a) { int j, j1, j2, j3, k, kj, kr, m, m2; double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; m = l << 2; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } wn4r = WR5000; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x2i - x0i; a[j2 + 1] = x0r - x2r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wn4r * (x0r - x0i); a[j1 + 1] = wn4r * (x0r + x0i); x0r = x3i + x1r; x0i = x3r - x1i; a[j3] = wn4r * (x0i - x0r); a[j3 + 1] = wn4r * (x0i + x0r); } ew = M_PI_2 / n; kr = 0; m2 = 2 * m; for (k = m2; k < n; k += m2) { for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); wk1r = cos(ew * kr); wk1i = sin(ew * kr); wk2r = 1 - 2 * wk1i * wk1i; wk2i = 2 * wk1i * wk1r; wk3r = wk1r - 2 * wk2i * wk1i; wk3i = 2 * wk2i * wk1r - wk1i; for (j = k; j < l + k; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = wk2r * x0r - wk2i * x0i; a[j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } x0r = wn4r * (wk1r - wk1i); wk1i = wn4r * (wk1r + wk1i); wk1r = x0r; wk3r = wk1r - 2 * wk2r * wk1i; wk3i = 2 * wk2r * wk1r - wk1i; for (j = k + m; j < l + (k + m); j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; x0r -= x2r; x0i -= x2i; a[j2] = -wk2i * x0r - wk2r * x0i; a[j2 + 1] = -wk2i * x0i + wk2r * x0r; x0r = x1r - x3i; x0i = x1i + x3r; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; } } } void rftfsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[j + 2] -= yr; a[j + 3] -= yi; a[k - 2] += yr; a[k - 1] -= yi; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[2] -= yr; a[3] -= yi; a[n - 2] += yr; a[n - 1] -= yi; } void rftbsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; a[i + 1] = -a[i + 1]; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[j + 2] -= yr; a[j + 3] = yi - a[j + 3]; a[k - 2] += yr; a[k - 1] = yi - a[k - 1]; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] = yi - a[j + 1]; a[k] += yr; a[k + 1] = yi - a[k + 1]; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[2] -= yr; a[3] = yi - a[3]; a[n - 2] += yr; a[n - 1] = yi - a[n - 1]; a[1] = -a[1]; } void dctsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[j - 1] - wdr * a[k + 1]; xi = wdr * a[j - 1] + wdi * a[k + 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[j] - wkr * a[k]; yi = wkr * a[j] + wki * a[k]; wdr -= ss * wki; wdi += ss * wkr; a[k + 1] = xr; a[k] = yr; a[j - 1] = xi; a[j] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m - 1] - wdr * a[m + 1]; a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1]; a[m + 1] = xr; a[m] *= wki + ss * wdr; } void dstsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[k + 1] - wdr * a[j - 1]; xi = wdr * a[k + 1] + wdi * a[j - 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[k] - wkr * a[j]; yi = wkr * a[k] + wki * a[j]; wdr -= ss * wki; wdi += ss * wkr; a[j - 1] = xr; a[j] = yr; a[k + 1] = xi; a[k] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m + 1] - wdr * a[m - 1]; a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1]; a[m - 1] = xr; a[m] *= wki + ss * wdr; } void dctsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[1] - wdr * a[3]; a[1] = wdr * a[1] + wdi * a[3]; a[3] = xr; } a[m] *= wki; } void dstsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[3] - wdr * a[1]; a[3] = wdr * a[3] + wdi * a[1]; a[1] = xr; } a[m] *= wki; } OouraFFT-1.0/fft8g.c000066400000000000000000001354071246725775500142170ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :8, 4, 2 data :inplace table :use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *, int *, double *); void rdft(int, int, double *, int *, double *); void ddct(int, int, double *, int *, double *); void ddst(int, int, double *, int *, double *); void dfct(int, double *, double *, int *, double *); void dfst(int, double *, double *, int *, double *); -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k ip[0] = 0; // first time only cdft(2*n, 1, a, ip, w); ip[0] = 0; // first time only cdft(2*n, -1, a, ip, w); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) strictly, length of ip >= 2+(1<<(int)(log(n+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft(2*n, -1, a, ip, w); is cdft(2*n, 1, a, ip, w); for (j = 0; j <= 2 * n - 1; j++) { a[j] *= 1.0 / n; } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k ip[0] = 0; // first time only rdft(n, 1, a, ip, w); ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft(n, 1, a, ip, w); is rdft(n, -1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ip[0] = 0; // first time only ddct(n, 1, a, ip, w); ip[0] = 0; // first time only ddct(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct(n, -1, a, ip, w); is a[0] *= 0.5; ddct(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ip[0] = 0; // first time only ddst(n, 1, a, ip, w); ip[0] = 0; // first time only ddst(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst(n, -1, a, ip, w); is a[0] *= 0.5; ddst(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- [definition] C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n [usage] ip[0] = 0; // first time only dfct(n, a, t, ip, w); [parameters] n :data length - 1 (int) n >= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n t[0...n/2] :work area (double *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of dfst(n, a, t, ip, w); is dfst(n, a, t, ip, w); for (j = 1; j <= n - 1; j++) { a[j] *= 2.0 / n; } . Appendix : The cos/sin table is recalculated when the larger table required. w[] and ip[] are compatible with all routines. */ void cdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void bitrv2(int n, int *ip, double *a); void bitrv2conj(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } if (n > 4) { if (isgn >= 0) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); } else { bitrv2conj(n, ip + 2, a); cftbsub(n, a, w); } } else if (n == 4) { cftfsub(n, a, w); } } void rdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); int nw, nc; double xi; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 2)) { nc = n >> 2; makect(nc, ip, w + nw); } if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } } void ddct(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } dctsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void cftbsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a, nc, w + nw); bitrv2(n, ip + 2, a); cftbsub(n, a, w); } else if (n == 4) { cftfsub(n, a, w); } } dstsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { bitrv2(n, ip + 2, a); cftfsub(n, a, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } m = n >> 1; yi = a[m]; xi = a[0] + a[n]; a[0] -= a[n]; t[0] = xi - yi; t[m] = xi + yi; if (n > 2) { mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] - a[n - j]; xi = a[j] + a[n - j]; yr = a[k] - a[n - k]; yi = a[k] + a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi - yi; t[k] = xi + yi; } t[mh] = a[mh] + a[n - mh]; a[mh] -= a[n - mh]; dctsub(m, a, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, a); cftfsub(m, a, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, w); } a[n - 1] = a[0] - a[1]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] + a[j + 1]; a[2 * j - 1] = a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dctsub(m, t, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, t); cftfsub(m, t, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, w); } a[n - l] = t[0] - t[1]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = t[j] - t[j + 1]; a[k + l] = t[j] + t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 0; j < mh; j++) { k = m - j; t[j] = t[m + k] - t[m + j]; t[k] = t[m + k] + t[m + j]; } t[mh] = t[m + mh]; m = mh; } a[l] = t[0]; a[n] = t[2] - t[1]; a[0] = t[2] + t[1]; } else { a[1] = a[0]; a[2] = t[0]; a[0] = t[1]; } } void dfst(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void bitrv2(int n, int *ip, double *a); void cftfsub(int n, double *a, double *w); void rftfsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } if (n > 2) { m = n >> 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] + a[n - j]; xi = a[j] - a[n - j]; yr = a[k] + a[n - k]; yi = a[k] - a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi + yi; t[k] = xi - yi; } t[0] = a[mh] - a[n - mh]; a[mh] += a[n - mh]; a[0] = a[m]; dstsub(m, a, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, a); cftfsub(m, a, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, w); } a[n - 1] = a[1] - a[0]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] - a[j + 1]; a[2 * j - 1] = -a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dstsub(m, t, nc, w + nw); if (m > 4) { bitrv2(m, ip + 2, t); cftfsub(m, t, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, w); } a[n - l] = t[1] - t[0]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = -t[j] - t[j + 1]; a[k + l] = t[j] - t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; t[j] = t[m + k] + t[m + j]; t[k] = t[m + k] - t[m + j]; } t[0] = t[m + mh]; m = mh; } a[l] = t[0]; } a[0] = 0; } /* -------- initializing routines -------- */ #include void makewt(int nw, int *ip, double *w) { void bitrv2(int n, int *ip, double *a); int j, nwh; double delta, x, y; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; w[0] = 1; w[1] = 0; w[nwh] = cos(delta * nwh); w[nwh + 1] = w[nwh]; if (nwh > 2) { for (j = 2; j < nwh; j += 2) { x = cos(delta * j); y = sin(delta * j); w[j] = x; w[j + 1] = y; w[nw - j] = y; w[nw - j + 1] = x; } for (j = nwh - 2; j >= 2; j -= 2) { x = w[2 * j]; y = w[2 * j + 1]; w[nwh + j] = x; w[nwh + j + 1] = y; } bitrv2(nw, ip + 2, w); } } } void makect(int nc, int *ip, double *c) { int j, nch; double delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ void bitrv2(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi, yr, yi; ip[0] = 0; l = n; m = 1; while ((m << 3) < l) { l >>= 1; for (j = 0; j < m; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } m2 = 2 * m; if ((m << 3) == l) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 -= m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } j1 = 2 * k + m2 + ip[k]; k1 = j1 + m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 1; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } } void bitrv2conj(int n, int *ip, double *a) { int j, j1, k, k1, l, m, m2; double xr, xi, yr, yi; ip[0] = 0; l = n; m = 1; while ((m << 3) < l) { l >>= 1; for (j = 0; j < m; j++) { ip[m + j] = ip[j] + l; } m <<= 1; } m2 = 2 * m; if ((m << 3) == l) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 -= m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += 2 * m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 2 * k + ip[k]; a[k1 + 1] = -a[k1 + 1]; j1 = k1 + m2; k1 = j1 + m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; k1 += m2; a[k1 + 1] = -a[k1 + 1]; } } else { a[1] = -a[1]; a[m2 + 1] = -a[m2 + 1]; for (k = 1; k < m; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m2; k1 += m2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 2 * k + ip[k]; a[k1 + 1] = -a[k1 + 1]; a[k1 + m2 + 1] = -a[k1 + m2 + 1]; } } } void cftfsub(int n, double *a, double *w) { void cft1st(int n, double *a, double *w); void cftmdl(int n, int l, double *a, double *w); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n >= 16) { cft1st(n, a, w); l = 16; while ((l << 3) <= n) { cftmdl(n, l, a, w); l <<= 3; } } if ((l << 1) < n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } } else if ((l << 1) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = a[j + 1] - a[j1 + 1]; a[j] += a[j1]; a[j + 1] += a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cftbsub(int n, double *a, double *w) { void cft1st(int n, double *a, double *w); void cftmdl(int n, int l, double *a, double *w); int j, j1, j2, j3, j4, j5, j6, j7, l; double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; l = 2; if (n > 16) { cft1st(n, a, w); l = 16; while ((l << 3) < n) { cftmdl(n, l, a, w); l <<= 3; } } if ((l << 2) < n) { wn4r = w[2]; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i - x2i; y2r = x0r - x2r; y2i = x0i + x2i; y1r = x1r - x3i; y1i = x1i - x3r; y3r = x1r + x3i; y3i = x1i + x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[j1] = y1r + y5r; a[j1 + 1] = y1i - y5i; a[j5] = y1r - y5r; a[j5 + 1] = y1i + y5i; a[j3] = y3r - y7i; a[j3 + 1] = y3i - y7r; a[j7] = y3r + y7i; a[j7 + 1] = y3i + y7r; a[j] = y0r + y4r; a[j + 1] = y0i - y4i; a[j4] = y0r - y4r; a[j4 + 1] = y0i + y4i; a[j2] = y2r - y6i; a[j2 + 1] = y2i - y6r; a[j6] = y2r + y6i; a[j6 + 1] = y2i + y6r; } } else if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i + x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i - x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i + x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = -a[j + 1] + a[j1 + 1]; a[j] += a[j1]; a[j + 1] = -a[j + 1] - a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cft1st(int n, double *a, double *w) { int j, k1; double wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[2]; x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; x1i = a[1] - a[3]; x2r = a[4] + a[6]; x2i = a[5] + a[7]; x3r = a[4] - a[6]; x3i = a[5] - a[7]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; x1i = a[9] - a[11]; x2r = a[12] + a[14]; x2i = a[13] + a[15]; x3r = a[12] - a[14]; x3i = a[13] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[2] = y1r + y5r; a[3] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[6] = y3r - y7i; a[7] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[8] = y0r - y4r; a[9] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[12] = y2r + y6i; a[13] = y2i - y6r; if (n > 16) { wk1r = w[4]; wk1i = w[5]; x0r = a[16] + a[18]; x0i = a[17] + a[19]; x1r = a[16] - a[18]; x1i = a[17] - a[19]; x2r = a[20] + a[22]; x2i = a[21] + a[23]; x3r = a[20] - a[22]; x3i = a[21] - a[23]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[24] + a[26]; x0i = a[25] + a[27]; x1r = a[24] - a[26]; x1i = a[25] - a[27]; x2r = a[28] + a[30]; x2i = a[29] + a[31]; x3r = a[28] - a[30]; x3i = a[29] - a[31]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x3r - x1i; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; y7r = wk1r * x2r + wk1i * x2i; y7i = wk1r * x2i - wk1i * x2r; x0r = wk1r * y1r - wk1i * y1i; x0i = wk1r * y1i + wk1i * y1r; a[18] = x0r + y5r; a[19] = x0i + y5i; a[26] = y5i - x0i; a[27] = x0r - y5r; x0r = wk1i * y3r - wk1r * y3i; x0i = wk1i * y3i + wk1r * y3r; a[22] = x0r - y7r; a[23] = x0i + y7i; a[30] = y7i - x0i; a[31] = x0r + y7r; a[16] = y0r + y4r; a[17] = y0i + y4i; a[24] = y4i - y0i; a[25] = y0r - y4r; x0r = y2r - y6i; x0i = y2i + y6r; a[20] = wn4r * (x0r - x0i); a[21] = wn4r * (x0i + x0r); x0r = y6r - y2i; x0i = y2r + y6i; a[28] = wn4r * (x0r - x0i); a[29] = wn4r * (x0i + x0r); k1 = 4; for (j = 32; j < n; j += 16) { k1 += 4; wk1r = w[k1]; wk1i = w[k1 + 1]; wk2r = w[k1 + 2]; wk2i = w[k1 + 3]; wtmp = 2 * wk2i; wk3r = wk1r - wtmp * wk1i; wk3i = wtmp * wk1r - wk1i; wk4r = 1 - wtmp * wk2i; wk4i = wtmp * wk2r; wtmp = 2 * wk4i; wk5r = wk3r - wtmp * wk1i; wk5i = wtmp * wk1r - wk3i; wk6r = wk2r - wtmp * wk2i; wk6i = wtmp * wk2r - wk2i; wk7r = wk1r - wtmp * wk3i; wk7i = wtmp * wk3r - wk1i; x0r = a[j] + a[j + 2]; x0i = a[j + 1] + a[j + 3]; x1r = a[j] - a[j + 2]; x1i = a[j + 1] - a[j + 3]; x2r = a[j + 4] + a[j + 6]; x2i = a[j + 5] + a[j + 7]; x3r = a[j + 4] - a[j + 6]; x3i = a[j + 5] - a[j + 7]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j + 8] + a[j + 10]; x0i = a[j + 9] + a[j + 11]; x1r = a[j + 8] - a[j + 10]; x1i = a[j + 9] - a[j + 11]; x2r = a[j + 12] + a[j + 14]; x2i = a[j + 13] + a[j + 15]; x3r = a[j + 12] - a[j + 14]; x3i = a[j + 13] - a[j + 15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); x0r = y1r + y5r; x0i = y1i + y5i; a[j + 2] = wk1r * x0r - wk1i * x0i; a[j + 3] = wk1r * x0i + wk1i * x0r; x0r = y1r - y5r; x0i = y1i - y5i; a[j + 10] = wk5r * x0r - wk5i * x0i; a[j + 11] = wk5r * x0i + wk5i * x0r; x0r = y3r - y7i; x0i = y3i + y7r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; x0r = y3r + y7i; x0i = y3i - y7r; a[j + 14] = wk7r * x0r - wk7i * x0i; a[j + 15] = wk7r * x0i + wk7i * x0r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; x0r = y0r - y4r; x0i = y0i - y4i; a[j + 8] = wk4r * x0r - wk4i * x0i; a[j + 9] = wk4r * x0i + wk4i * x0r; x0r = y2r - y6i; x0i = y2i + y6r; a[j + 4] = wk2r * x0r - wk2i * x0i; a[j + 5] = wk2r * x0i + wk2i * x0r; x0r = y2r + y6i; x0i = y2i - y6r; a[j + 12] = wk6r * x0r - wk6i * x0i; a[j + 13] = wk6r * x0i + wk6i * x0r; } } } void cftmdl(int n, int l, double *a, double *w) { int j, j1, j2, j3, j4, j5, j6, j7, k, k1, m; double wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; m = l << 3; wn4r = w[2]; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[j1] = y1r + y5r; a[j1 + 1] = y1i + y5i; a[j5] = y1r - y5r; a[j5 + 1] = y1i - y5i; a[j3] = y3r - y7i; a[j3 + 1] = y3i + y7r; a[j7] = y3r + y7i; a[j7 + 1] = y3i - y7r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; a[j4] = y0r - y4r; a[j4 + 1] = y0i - y4i; a[j2] = y2r - y6i; a[j2 + 1] = y2i + y6r; a[j6] = y2r + y6i; a[j6 + 1] = y2i - y6r; } if (m < n) { wk1r = w[4]; wk1i = w[5]; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x3r - x1i; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; y7r = wk1r * x2r + wk1i * x2i; y7i = wk1r * x2i - wk1i * x2r; x0r = wk1r * y1r - wk1i * y1i; x0i = wk1r * y1i + wk1i * y1r; a[j1] = x0r + y5r; a[j1 + 1] = x0i + y5i; a[j5] = y5i - x0i; a[j5 + 1] = x0r - y5r; x0r = wk1i * y3r - wk1r * y3i; x0i = wk1i * y3i + wk1r * y3r; a[j3] = x0r - y7r; a[j3 + 1] = x0i + y7i; a[j7] = y7i - x0i; a[j7 + 1] = x0r + y7r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; a[j4] = y4i - y0i; a[j4 + 1] = y0r - y4r; x0r = y2r - y6i; x0i = y2i + y6r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = y6r - y2i; x0i = y2r + y6i; a[j6] = wn4r * (x0r - x0i); a[j6 + 1] = wn4r * (x0i + x0r); } k1 = 4; for (k = 2 * m; k < n; k += m) { k1 += 4; wk1r = w[k1]; wk1i = w[k1 + 1]; wk2r = w[k1 + 2]; wk2i = w[k1 + 3]; wtmp = 2 * wk2i; wk3r = wk1r - wtmp * wk1i; wk3i = wtmp * wk1r - wk1i; wk4r = 1 - wtmp * wk2i; wk4i = wtmp * wk2r; wtmp = 2 * wk4i; wk5r = wk3r - wtmp * wk1i; wk5i = wtmp * wk1r - wk3i; wk6r = wk2r - wtmp * wk2i; wk6i = wtmp * wk2r - wk2i; wk7r = wk1r - wtmp * wk3i; wk7i = wtmp * wk3r - wk1i; for (j = k; j < l + k; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); x0r = y1r + y5r; x0i = y1i + y5i; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r - y5r; x0i = y1i - y5i; a[j5] = wk5r * x0r - wk5i * x0i; a[j5 + 1] = wk5r * x0i + wk5i * x0r; x0r = y3r - y7i; x0i = y3i + y7r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; x0r = y3r + y7i; x0i = y3i - y7r; a[j7] = wk7r * x0r - wk7i * x0i; a[j7 + 1] = wk7r * x0i + wk7i * x0r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; x0r = y0r - y4r; x0i = y0i - y4i; a[j4] = wk4r * x0r - wk4i * x0i; a[j4 + 1] = wk4r * x0i + wk4i * x0r; x0r = y2r - y6i; x0i = y2i + y6r; a[j2] = wk2r * x0r - wk2i * x0i; a[j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = y2r + y6i; x0i = y2i - y6r; a[j6] = wk6r * x0r - wk6i * x0i; a[j6 + 1] = wk6r * x0i + wk6i * x0r; } } } } void rftfsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void rftbsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] = yi - a[j + 1]; a[k] += yr; a[k + 1] = yi - a[k + 1]; } a[m + 1] = -a[m + 1]; } void dctsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[j] - wkr * a[k]; a[j] = wkr * a[j] + wki * a[k]; a[k] = xr; } a[m] *= c[0]; } void dstsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[k] - wkr * a[j]; a[k] = wkr * a[k] + wki * a[j]; a[j] = xr; } a[m] *= c[0]; } OouraFFT-1.0/fft8g.f000066400000000000000000001366021246725775500142200ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :one ! data length :power of 2 ! decimation :frequency ! radix :8, 4, 2 ! data :inplace ! table :use ! subroutines ! cdft: Complex Discrete Fourier Transform ! rdft: Real Discrete Fourier Transform ! ddct: Discrete Cosine Transform ! ddst: Discrete Sine Transform ! dfct: Cosine Transform of RDFT (Real Symmetric DFT) ! dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k ! X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call cdft(2*n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft(2*n, -1, a, ip, w) ! [parameters] ! 2*n :data length (integer) ! n >= 1, n = power of 2 ! a(0:2*n-1) :input/output data (real*8) ! input data ! a(2*j) = Re(x(j)), ! a(2*j+1) = Im(x(j)), 0<=j= 2+sqrt(n) ! strictly, ! length of ip >= ! 2+2**(int(log(n+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft(2*n, -1, a, ip, w) ! is ! call cdft(2*n, 1, a, ip, w) ! do j = 0, 2 * n - 1 ! a(j) = a(j) / n ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2 ! I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) ! a(k) = (R(0) + R(n/2)*cos(pi*k))/2 + ! sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) + ! sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call rdft(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! output data ! a(2*k) = R(k), 0<=k ! input data ! a(2*j) = R(j), 0<=j= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft(n, 1, a, ip, w) ! is ! call rdft(n, -1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k DCT ! C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k ! ip(0) = 0 ! first time only ! call ddct(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddct(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k DST ! S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0 ! ip(0) = 0 ! first time only ! call ddst(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! input data ! a(j) = A(j), 0 ! output data ! a(k) = S(k), 0= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddst(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- ! [definition] ! C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n ! [usage] ! ip(0) = 0 ! first time only ! call dfct(n, a, t, ip, w) ! [parameters] ! n :data length - 1 (integer) ! n >= 2, n = power of 2 ! a(0:n) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k<=n ! t(0:n/2) :work area (real*8) ! ip(0:*) :work area for bit reversal (integer) ! length of ip >= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! is ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! do j = 0, n ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- ! [definition] ! S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = S(k), 0= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call dfst(n, a, t, ip, w) ! is ! call dfst(n, a, t, ip, w) ! do j = 1, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! Appendix : ! The cos/sin table is recalculated when the larger table required. ! w() and ip() are compatible with all routines. ! ! subroutine cdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *) real*8 a(0 : n - 1), w(0 : *) if (n .gt. 4 * ip(0)) then call makewt(n / 4, ip, w) end if if (n .gt. 4) then if (isgn .ge. 0) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) else call bitrv2conj(n, ip(2), a) call cftbsub(n, a, w) end if else if (n .eq. 4) then call cftfsub(n, a, w) end if end ! subroutine rdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), nw, nc real*8 a(0 : n - 1), w(0 : *), xi nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 4 * nc) then nc = n / 4 call makect(nc, ip, w(nw)) end if if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xi = a(0) - a(1) a(0) = a(0) + a(1) a(1) = xi else a(1) = 0.5d0 * (a(0) - a(1)) a(0) = a(0) - a(1) if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if end ! subroutine ddct(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = a(j) - a(j - 1) a(j) = a(j) + a(j - 1) end do a(1) = a(0) - xr a(0) = a(0) + xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if call dctsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = a(j) - a(j + 1) a(j) = a(j) + a(j + 1) end do a(n - 1) = xr end if end ! subroutine ddst(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = -a(j) - a(j - 1) a(j) = a(j) - a(j - 1) end do a(1) = a(0) + xr a(0) = a(0) - xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call bitrv2(n, ip(2), a) call cftbsub(n, a, w) else if (n .eq. 4) then call cftfsub(n, a, w) end if end if call dstsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call bitrv2(n, ip(2), a) call cftfsub(n, a, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = -a(j) - a(j + 1) a(j) = a(j) - a(j + 1) end do a(n - 1) = -xr end if end ! subroutine dfct(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if m = n / 2 yi = a(m) xi = a(0) + a(n) a(0) = a(0) - a(n) t(0) = xi - yi t(m) = xi + yi if (n .gt. 2) then mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) - a(n - j) xi = a(j) + a(n - j) yr = a(k) - a(n - k) yi = a(k) + a(n - k) a(j) = xr a(k) = yr t(j) = xi - yi t(k) = xi + yi end do t(mh) = a(mh) + a(n - mh) a(mh) = a(mh) - a(n - mh) call dctsub(m, a, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), a) call cftfsub(m, a, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, w) end if a(n - 1) = a(0) - a(1) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) + a(j + 1) a(2 * j - 1) = a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dctsub(m, t, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), t) call cftfsub(m, t, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, w) end if a(n - l) = t(0) - t(1) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = t(j) - t(j + 1) a(k + l) = t(j) + t(j + 1) end do l = 2 * l mh = m / 2 do j = 0, mh - 1 k = m - j t(j) = t(m + k) - t(m + j) t(k) = t(m + k) + t(m + j) end do t(mh) = t(m + mh) m = mh end do a(l) = t(0) a(n) = t(2) - t(1) a(0) = t(2) + t(1) else a(1) = a(0) a(2) = t(0) a(0) = t(1) end if end ! subroutine dfst(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if if (n .gt. 2) then m = n / 2 mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) + a(n - j) xi = a(j) - a(n - j) yr = a(k) + a(n - k) yi = a(k) - a(n - k) a(j) = xr a(k) = yr t(j) = xi + yi t(k) = xi - yi end do t(0) = a(mh) - a(n - mh) a(mh) = a(mh) + a(n - mh) a(0) = a(m) call dstsub(m, a, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), a) call cftfsub(m, a, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, w) end if a(n - 1) = a(1) - a(0) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) - a(j + 1) a(2 * j - 1) = -a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dstsub(m, t, nc, w(nw)) if (m .gt. 4) then call bitrv2(m, ip(2), t) call cftfsub(m, t, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, w) end if a(n - l) = t(1) - t(0) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = -t(j) - t(j + 1) a(k + l) = t(j) - t(j + 1) end do l = 2 * l mh = m / 2 do j = 1, mh - 1 k = m - j t(j) = t(m + k) + t(m + j) t(k) = t(m + k) - t(m + j) end do t(0) = t(m + mh) m = mh end do a(l) = t(0) end if a(0) = 0 end ! ! -------- initializing routines -------- ! subroutine makewt(nw, ip, w) integer nw, ip(0 : *), j, nwh real*8 w(0 : nw - 1), delta, x, y ip(0) = nw ip(1) = 1 if (nw .gt. 2) then nwh = nw / 2 delta = atan(1.0d0) / nwh w(0) = 1 w(1) = 0 w(nwh) = cos(delta * nwh) w(nwh + 1) = w(nwh) if (nwh .gt. 2) then do j = 2, nwh - 2, 2 x = cos(delta * j) y = sin(delta * j) w(j) = x w(j + 1) = y w(nw - j) = y w(nw - j + 1) = x end do do j = nwh - 2, 2, -2 x = w(2 * j) y = w(2 * j + 1) w(nwh + j) = x w(nwh + j + 1) = y end do call bitrv2(nw, ip(2), w) end if end if end ! subroutine makect(nc, ip, c) integer nc, ip(0 : *), j, nch real*8 c(0 : nc - 1), delta ip(1) = nc if (nc .gt. 1) then nch = nc / 2 delta = atan(1.0d0) / nch c(0) = cos(delta * nch) c(nch) = 0.5d0 * c(0) do j = 1, nch - 1 c(j) = 0.5d0 * cos(delta * j) c(nc - j) = 0.5d0 * sin(delta * j) end do end if end ! ! -------- child routines -------- ! subroutine bitrv2(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, m2 real*8 a(0 : n - 1), xr, xi, yr, yi ip(0) = 0 l = n m = 1 do while (8 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do m2 = 2 * m if (8 * m .eq. l) then do k = 0, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 - m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do j1 = 2 * k + m2 + ip(k) k1 = j1 + m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do else do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do end do end if end ! subroutine bitrv2conj(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, m2 real*8 a(0 : n - 1), xr, xi, yr, yi ip(0) = 0 l = n m = 1 do while (8 * m .lt. l) l = l / 2 do j = 0, m - 1 ip(m + j) = ip(j) + l end do m = m * 2 end do m2 = 2 * m if (8 * m .eq. l) then do k = 0, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 - m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + 2 * m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 2 * k + ip(k) a(k1 + 1) = -a(k1 + 1) j1 = k1 + m2 k1 = j1 + m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi k1 = k1 + m2 a(k1 + 1) = -a(k1 + 1) end do else a(1) = -a(1) a(m2 + 1) = -a(m2 + 1) do k = 1, m - 1 do j = 0, k - 1 j1 = 2 * j + ip(k) k1 = 2 * k + ip(j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + m2 k1 = k1 + m2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 2 * k + ip(k) a(k1 + 1) = -a(k1 + 1) a(k1 + m2 + 1) = -a(k1 + m2 + 1) end do end if end ! subroutine cftfsub(n, a, w) integer n, j, j1, j2, j3, l real*8 a(0 : n - 1), w(0 : *) real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i l = 2 if (n .ge. 16) then call cft1st(n, a, w) l = 16 do while (8 * l .le. n) call cftmdl(n, l, a, w) l = 8 * l end do end if if (2 * l .lt. n) then do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j2) = x0r - x2r a(j2 + 1) = x0i - x2i a(j1) = x1r - x3i a(j1 + 1) = x1i + x3r a(j3) = x1r + x3i a(j3 + 1) = x1i - x3r end do else if (2 * l .eq. n) then do j = 0, l - 2, 2 j1 = j + l x0r = a(j) - a(j1) x0i = a(j + 1) - a(j1 + 1) a(j) = a(j) + a(j1) a(j + 1) = a(j + 1) + a(j1 + 1) a(j1) = x0r a(j1 + 1) = x0i end do end if end ! subroutine cftbsub(n, a, w) integer n, j, j1, j2, j3, j4, j5, j6, j7, l real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i l = 2 if (n .gt. 16) then call cft1st(n, a, w) l = 16 do while (8 * l .lt. n) call cftmdl(n, l, a, w) l = 8 * l end do end if if (4 * l .lt. n) then wn4r = w(2) do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l j4 = j3 + l j5 = j4 + l j6 = j5 + l j7 = j6 + l x0r = a(j) + a(j1) x0i = -a(j + 1) - a(j1 + 1) x1r = a(j) - a(j1) x1i = -a(j + 1) + a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) y0r = x0r + x2r y0i = x0i - x2i y2r = x0r - x2r y2i = x0i + x2i y1r = x1r - x3i y1i = x1i - x3r y3r = x1r + x3i y3i = x1i + x3r x0r = a(j4) + a(j5) x0i = a(j4 + 1) + a(j5 + 1) x1r = a(j4) - a(j5) x1i = a(j4 + 1) - a(j5 + 1) x2r = a(j6) + a(j7) x2i = a(j6 + 1) + a(j7 + 1) x3r = a(j6) - a(j7) x3i = a(j6 + 1) - a(j7 + 1) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) a(j1) = y1r + y5r a(j1 + 1) = y1i - y5i a(j5) = y1r - y5r a(j5 + 1) = y1i + y5i a(j3) = y3r - y7i a(j3 + 1) = y3i - y7r a(j7) = y3r + y7i a(j7 + 1) = y3i + y7r a(j) = y0r + y4r a(j + 1) = y0i - y4i a(j4) = y0r - y4r a(j4 + 1) = y0i + y4i a(j2) = y2r - y6i a(j2 + 1) = y2i - y6r a(j6) = y2r + y6i a(j6 + 1) = y2i + y6r end do else if (4 * l .eq. n) then do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l x0r = a(j) + a(j1) x0i = -a(j + 1) - a(j1 + 1) x1r = a(j) - a(j1) x1i = -a(j + 1) + a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i - x2i a(j2) = x0r - x2r a(j2 + 1) = x0i + x2i a(j1) = x1r - x3i a(j1 + 1) = x1i - x3r a(j3) = x1r + x3i a(j3 + 1) = x1i + x3r end do else do j = 0, l - 2, 2 j1 = j + l x0r = a(j) - a(j1) x0i = -a(j + 1) + a(j1 + 1) a(j) = a(j) + a(j1) a(j + 1) = -a(j + 1) - a(j1 + 1) a(j1) = x0r a(j1 + 1) = x0i end do end if end ! subroutine cft1st(n, a, w) integer n, j, k1 real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i wn4r = w(2) x0r = a(0) + a(2) x0i = a(1) + a(3) x1r = a(0) - a(2) x1i = a(1) - a(3) x2r = a(4) + a(6) x2i = a(5) + a(7) x3r = a(4) - a(6) x3i = a(5) - a(7) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(8) + a(10) x0i = a(9) + a(11) x1r = a(8) - a(10) x1i = a(9) - a(11) x2r = a(12) + a(14) x2i = a(13) + a(15) x3r = a(12) - a(14) x3i = a(13) - a(15) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) a(2) = y1r + y5r a(3) = y1i + y5i a(10) = y1r - y5r a(11) = y1i - y5i a(6) = y3r - y7i a(7) = y3i + y7r a(14) = y3r + y7i a(15) = y3i - y7r a(0) = y0r + y4r a(1) = y0i + y4i a(8) = y0r - y4r a(9) = y0i - y4i a(4) = y2r - y6i a(5) = y2i + y6r a(12) = y2r + y6i a(13) = y2i - y6r if (n .gt. 16) then wk1r = w(4) wk1i = w(5) x0r = a(16) + a(18) x0i = a(17) + a(19) x1r = a(16) - a(18) x1i = a(17) - a(19) x2r = a(20) + a(22) x2i = a(21) + a(23) x3r = a(20) - a(22) x3i = a(21) - a(23) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(24) + a(26) x0i = a(25) + a(27) x1r = a(24) - a(26) x1i = a(25) - a(27) x2r = a(28) + a(30) x2i = a(29) + a(31) x3r = a(28) - a(30) x3i = a(29) - a(31) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x3r - x1i y5r = wk1i * x0r - wk1r * x0i y5i = wk1i * x0i + wk1r * x0r y7r = wk1r * x2r + wk1i * x2i y7i = wk1r * x2i - wk1i * x2r x0r = wk1r * y1r - wk1i * y1i x0i = wk1r * y1i + wk1i * y1r a(18) = x0r + y5r a(19) = x0i + y5i a(26) = y5i - x0i a(27) = x0r - y5r x0r = wk1i * y3r - wk1r * y3i x0i = wk1i * y3i + wk1r * y3r a(22) = x0r - y7r a(23) = x0i + y7i a(30) = y7i - x0i a(31) = x0r + y7r a(16) = y0r + y4r a(17) = y0i + y4i a(24) = y4i - y0i a(25) = y0r - y4r x0r = y2r - y6i x0i = y2i + y6r a(20) = wn4r * (x0r - x0i) a(21) = wn4r * (x0i + x0r) x0r = y6r - y2i x0i = y2r + y6i a(28) = wn4r * (x0r - x0i) a(29) = wn4r * (x0i + x0r) k1 = 4 do j = 32, n - 16, 16 k1 = k1 + 4 wk1r = w(k1) wk1i = w(k1 + 1) wk2r = w(k1 + 2) wk2i = w(k1 + 3) wtmp = 2 * wk2i wk3r = wk1r - wtmp * wk1i wk3i = wtmp * wk1r - wk1i wk4r = 1 - wtmp * wk2i wk4i = wtmp * wk2r wtmp = 2 * wk4i wk5r = wk3r - wtmp * wk1i wk5i = wtmp * wk1r - wk3i wk6r = wk2r - wtmp * wk2i wk6i = wtmp * wk2r - wk2i wk7r = wk1r - wtmp * wk3i wk7i = wtmp * wk3r - wk1i x0r = a(j) + a(j + 2) x0i = a(j + 1) + a(j + 3) x1r = a(j) - a(j + 2) x1i = a(j + 1) - a(j + 3) x2r = a(j + 4) + a(j + 6) x2i = a(j + 5) + a(j + 7) x3r = a(j + 4) - a(j + 6) x3i = a(j + 5) - a(j + 7) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(j + 8) + a(j + 10) x0i = a(j + 9) + a(j + 11) x1r = a(j + 8) - a(j + 10) x1i = a(j + 9) - a(j + 11) x2r = a(j + 12) + a(j + 14) x2i = a(j + 13) + a(j + 15) x3r = a(j + 12) - a(j + 14) x3i = a(j + 13) - a(j + 15) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) x0r = y1r + y5r x0i = y1i + y5i a(j + 2) = wk1r * x0r - wk1i * x0i a(j + 3) = wk1r * x0i + wk1i * x0r x0r = y1r - y5r x0i = y1i - y5i a(j + 10) = wk5r * x0r - wk5i * x0i a(j + 11) = wk5r * x0i + wk5i * x0r x0r = y3r - y7i x0i = y3i + y7r a(j + 6) = wk3r * x0r - wk3i * x0i a(j + 7) = wk3r * x0i + wk3i * x0r x0r = y3r + y7i x0i = y3i - y7r a(j + 14) = wk7r * x0r - wk7i * x0i a(j + 15) = wk7r * x0i + wk7i * x0r a(j) = y0r + y4r a(j + 1) = y0i + y4i x0r = y0r - y4r x0i = y0i - y4i a(j + 8) = wk4r * x0r - wk4i * x0i a(j + 9) = wk4r * x0i + wk4i * x0r x0r = y2r - y6i x0i = y2i + y6r a(j + 4) = wk2r * x0r - wk2i * x0i a(j + 5) = wk2r * x0i + wk2i * x0r x0r = y2r + y6i x0i = y2i - y6r a(j + 12) = wk6r * x0r - wk6i * x0i a(j + 13) = wk6r * x0i + wk6i * x0r end do end if end ! subroutine cftmdl(n, l, a, w) integer n, l, j, j1, j2, j3, j4, j5, j6, j7, k, k1, m real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i m = 8 * l wn4r = w(2) do j = 0, l - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l j4 = j3 + l j5 = j4 + l j6 = j5 + l j7 = j6 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(j4) + a(j5) x0i = a(j4 + 1) + a(j5 + 1) x1r = a(j4) - a(j5) x1i = a(j4 + 1) - a(j5 + 1) x2r = a(j6) + a(j7) x2i = a(j6 + 1) + a(j7 + 1) x3r = a(j6) - a(j7) x3i = a(j6 + 1) - a(j7 + 1) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) a(j1) = y1r + y5r a(j1 + 1) = y1i + y5i a(j5) = y1r - y5r a(j5 + 1) = y1i - y5i a(j3) = y3r - y7i a(j3 + 1) = y3i + y7r a(j7) = y3r + y7i a(j7 + 1) = y3i - y7r a(j) = y0r + y4r a(j + 1) = y0i + y4i a(j4) = y0r - y4r a(j4 + 1) = y0i - y4i a(j2) = y2r - y6i a(j2 + 1) = y2i + y6r a(j6) = y2r + y6i a(j6 + 1) = y2i - y6r end do if (m .lt. n) then wk1r = w(4) wk1i = w(5) do j = m, l + m - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l j4 = j3 + l j5 = j4 + l j6 = j5 + l j7 = j6 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(j4) + a(j5) x0i = a(j4 + 1) + a(j5 + 1) x1r = a(j4) - a(j5) x1i = a(j4 + 1) - a(j5 + 1) x2r = a(j6) + a(j7) x2i = a(j6 + 1) + a(j7 + 1) x3r = a(j6) - a(j7) x3i = a(j6 + 1) - a(j7 + 1) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x3r - x1i y5r = wk1i * x0r - wk1r * x0i y5i = wk1i * x0i + wk1r * x0r y7r = wk1r * x2r + wk1i * x2i y7i = wk1r * x2i - wk1i * x2r x0r = wk1r * y1r - wk1i * y1i x0i = wk1r * y1i + wk1i * y1r a(j1) = x0r + y5r a(j1 + 1) = x0i + y5i a(j5) = y5i - x0i a(j5 + 1) = x0r - y5r x0r = wk1i * y3r - wk1r * y3i x0i = wk1i * y3i + wk1r * y3r a(j3) = x0r - y7r a(j3 + 1) = x0i + y7i a(j7) = y7i - x0i a(j7 + 1) = x0r + y7r a(j) = y0r + y4r a(j + 1) = y0i + y4i a(j4) = y4i - y0i a(j4 + 1) = y0r - y4r x0r = y2r - y6i x0i = y2i + y6r a(j2) = wn4r * (x0r - x0i) a(j2 + 1) = wn4r * (x0i + x0r) x0r = y6r - y2i x0i = y2r + y6i a(j6) = wn4r * (x0r - x0i) a(j6 + 1) = wn4r * (x0i + x0r) end do k1 = 4 do k = 2 * m, n - m, m k1 = k1 + 4 wk1r = w(k1) wk1i = w(k1 + 1) wk2r = w(k1 + 2) wk2i = w(k1 + 3) wtmp = 2 * wk2i wk3r = wk1r - wtmp * wk1i wk3i = wtmp * wk1r - wk1i wk4r = 1 - wtmp * wk2i wk4i = wtmp * wk2r wtmp = 2 * wk4i wk5r = wk3r - wtmp * wk1i wk5i = wtmp * wk1r - wk3i wk6r = wk2r - wtmp * wk2i wk6i = wtmp * wk2r - wk2i wk7r = wk1r - wtmp * wk3i wk7i = wtmp * wk3r - wk1i do j = k, l + k - 2, 2 j1 = j + l j2 = j1 + l j3 = j2 + l j4 = j3 + l j5 = j4 + l j6 = j5 + l j7 = j6 + l x0r = a(j) + a(j1) x0i = a(j + 1) + a(j1 + 1) x1r = a(j) - a(j1) x1i = a(j + 1) - a(j1 + 1) x2r = a(j2) + a(j3) x2i = a(j2 + 1) + a(j3 + 1) x3r = a(j2) - a(j3) x3i = a(j2 + 1) - a(j3 + 1) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(j4) + a(j5) x0i = a(j4 + 1) + a(j5 + 1) x1r = a(j4) - a(j5) x1i = a(j4 + 1) - a(j5 + 1) x2r = a(j6) + a(j7) x2i = a(j6 + 1) + a(j7 + 1) x3r = a(j6) - a(j7) x3i = a(j6 + 1) - a(j7 + 1) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) x0r = y1r + y5r x0i = y1i + y5i a(j1) = wk1r * x0r - wk1i * x0i a(j1 + 1) = wk1r * x0i + wk1i * x0r x0r = y1r - y5r x0i = y1i - y5i a(j5) = wk5r * x0r - wk5i * x0i a(j5 + 1) = wk5r * x0i + wk5i * x0r x0r = y3r - y7i x0i = y3i + y7r a(j3) = wk3r * x0r - wk3i * x0i a(j3 + 1) = wk3r * x0i + wk3i * x0r x0r = y3r + y7i x0i = y3i - y7r a(j7) = wk7r * x0r - wk7i * x0i a(j7 + 1) = wk7r * x0i + wk7i * x0r a(j) = y0r + y4r a(j + 1) = y0i + y4i x0r = y0r - y4r x0i = y0i - y4i a(j4) = wk4r * x0r - wk4i * x0i a(j4 + 1) = wk4r * x0i + wk4i * x0r x0r = y2r - y6i x0i = y2i + y6r a(j2) = wk2r * x0r - wk2i * x0i a(j2 + 1) = wk2r * x0i + wk2i * x0r x0r = y2r + y6i x0i = y2i - y6r a(j6) = wk6r * x0r - wk6i * x0i a(j6 + 1) = wk6r * x0i + wk6i * x0r end do end do end if end ! subroutine rftfsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr - wki * xi yi = wkr * xi + wki * xr a(j) = a(j) - yr a(j + 1) = a(j + 1) - yi a(k) = a(k) + yr a(k + 1) = a(k + 1) - yi end do end ! subroutine rftbsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi a(1) = -a(1) m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr + wki * xi yi = wkr * xi - wki * xr a(j) = a(j) - yr a(j + 1) = yi - a(j + 1) a(k) = a(k) + yr a(k + 1) = yi - a(k + 1) end do a(m + 1) = -a(m + 1) end ! subroutine dctsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(j) - wkr * a(k) a(j) = wkr * a(j) + wki * a(k) a(k) = xr end do a(m) = c(0) * a(m) end ! subroutine dstsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(k) - wkr * a(j) a(k) = wkr * a(k) + wki * a(j) a(j) = xr end do a(m) = c(0) * a(m) end ! OouraFFT-1.0/fft8g_h.c000066400000000000000000001310051246725775500145140ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :8, 4, 2 data :inplace table :not use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *); void rdft(int, int, double *); void ddct(int, int, double *); void ddst(int, int, double *); void dfct(int, double *); void dfst(int, double *); -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k cdft(2*n, 1, a); cdft(2*n, -1, a); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k rdft(n, 1, a); rdft(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ddct(n, 1, a); ddct(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ddst(n, 1, a); ddst(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0 4) { if (isgn >= 0) { bitrv2(n, a); cftfsub(n, a); } else { bitrv2conj(n, a); cftbsub(n, a); } } else if (n == 4) { cftfsub(n, a); } } void rdft(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); double xi; if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } } void ddct(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dctsub(int n, double *a); void dctsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } if (n > 4) { dctsub(n, a); } else { dctsub4(n, a); } if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a) { void bitrv2(int n, double *a); void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dstsub(int n, double *a); void dstsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a); bitrv2(n, a); cftbsub(n, a); } else if (n == 4) { cftfsub(n, a); } } if (n > 4) { dstsub(n, a); } else { dstsub4(n, a); } if (isgn >= 0) { if (n > 4) { bitrv2(n, a); cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a) { void ddct(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi, an; m = n >> 1; for (j = 0; j < m; j++) { k = n - j; xr = a[j] + a[k]; a[j] -= a[k]; a[k] = xr; } an = a[n]; while (m >= 2) { ddct(m, 1, a); bitrv1(m, a); mh = m >> 1; xi = a[m]; a[m] = a[0]; a[0] = an - xi; an += xi; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr - xi; a[k] = xr + xi; } xr = a[mh]; a[mh] = a[m + mh]; a[m + mh] = xr; m = mh; } xi = a[1]; a[1] = a[0]; a[0] = an + xi; a[n] = an - xi; bitrv1(n, a); } void dfst(int n, double *a) { void ddst(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi; m = n >> 1; for (j = 1; j < m; j++) { k = n - j; xr = a[j] - a[k]; a[j] += a[k]; a[k] = xr; } a[0] = a[m]; while (m >= 2) { ddst(m, 1, a); bitrv1(m, a); mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr + xi; a[k] = xr - xi; } a[m] = a[0]; a[0] = a[m + mh]; a[m + mh] = a[mh]; m = mh; } a[1] = a[0]; a[0] = 0; bitrv1(n, a); } /* -------- child routines -------- */ #include #ifndef M_PI_2 #define M_PI_2 1.570796326794896619231321691639751442098584699687 #endif #ifndef WR5000 /* cos(M_PI_2*0.5000) */ #define WR5000 0.707106781186547524400844362104849039284835937688 #endif #ifndef WR2500 /* cos(M_PI_2*0.2500) */ #define WR2500 0.923879532511286756128183189396788286822416625863 #endif #ifndef WI2500 /* sin(M_PI_2*0.2500) */ #define WI2500 0.382683432365089771728459984030398866761344562485 #endif #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ #define RDFT_LOOP_DIV 64 #endif #ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */ #define DCST_LOOP_DIV 64 #endif void bitrv2(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double xr, xi, yr, yi; l = n >> 2; m = 2; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0 += 2) { k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } j1 = j0 + k0 + m; k1 = j1 + m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } } } } void bitrv2conj(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double xr, xi, yr, yi; l = n >> 2; m = 2; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0 += 2) { k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; a[k1 + 1] = -a[k1 + 1]; j1 = k1 + m; k1 = j1 + m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; k1 += m; a[k1 + 1] = -a[k1 + 1]; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { a[1] = -a[1]; a[m + 1] = -a[m + 1]; j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = n >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; a[k1 + 1] = -a[k1 + 1]; a[k1 + m + 1] = -a[k1 + m + 1]; } } } void bitrv1(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k; double x; l = n >> 2; m = 1; while (m < l) { l >>= 1; m <<= 1; } if (m == l) { j0 = 0; for (k0 = 0; k0 < m; k0++) { k = k0; for (j = j0; j < j0 + k0; j++) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + m; k1 = k + 2 * m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 -= m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 += 2 * m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (k ^= i); i >>= 1); } j1 = j0 + k0 + m; k1 = j1 + m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 1; k0 < m; k0++) { for (i = n >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j++) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + m; k1 = k + m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = n >> 1; i > (k ^= i); i >>= 1); } } } } void cftfsub(int n, double *a) { void cft1st(int n, double *a); void cftmdl(int n, int l, double *a); int j, j1, j2, j3, l; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; if (n >= 16) { cft1st(n, a); l = 16; while ((l << 3) <= n) { cftmdl(n, l, a); l <<= 3; } } if ((l << 1) < n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i - x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; } } else if ((l << 1) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = a[j + 1] - a[j1 + 1]; a[j] += a[j1]; a[j + 1] += a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cftbsub(int n, double *a) { void cft1st(int n, double *a); void cftmdl(int n, int l, double *a); int j, j1, j2, j3, j4, j5, j6, j7, l; double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; l = 2; if (n > 16) { cft1st(n, a); l = 16; while ((l << 3) < n) { cftmdl(n, l, a); l <<= 3; } } if ((l << 2) < n) { wn4r = WR5000; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i - x2i; y2r = x0r - x2r; y2i = x0i + x2i; y1r = x1r - x3i; y1i = x1i - x3r; y3r = x1r + x3i; y3i = x1i + x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[j1] = y1r + y5r; a[j1 + 1] = y1i - y5i; a[j5] = y1r - y5r; a[j5 + 1] = y1i + y5i; a[j3] = y3r - y7i; a[j3 + 1] = y3i - y7r; a[j7] = y3r + y7i; a[j7 + 1] = y3i + y7r; a[j] = y0r + y4r; a[j + 1] = y0i - y4i; a[j4] = y0r - y4r; a[j4 + 1] = y0i + y4i; a[j2] = y2r - y6i; a[j2 + 1] = y2i - y6r; a[j6] = y2r + y6i; a[j6 + 1] = y2i + y6r; } } else if ((l << 2) == n) { for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; x0r = a[j] + a[j1]; x0i = -a[j + 1] - a[j1 + 1]; x1r = a[j] - a[j1]; x1i = -a[j + 1] + a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j2] = x0r - x2r; a[j2 + 1] = x0i + x2i; a[j1] = x1r - x3i; a[j1 + 1] = x1i - x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i + x3r; } } else { for (j = 0; j < l; j += 2) { j1 = j + l; x0r = a[j] - a[j1]; x0i = -a[j + 1] + a[j1 + 1]; a[j] += a[j1]; a[j + 1] = -a[j + 1] - a[j1 + 1]; a[j1] = x0r; a[j1 + 1] = x0i; } } } void cft1st(int n, double *a) { int j, kj, kr; double ew, wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = WR5000; x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; x1i = a[1] - a[3]; x2r = a[4] + a[6]; x2i = a[5] + a[7]; x3r = a[4] - a[6]; x3i = a[5] - a[7]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[8] + a[10]; x0i = a[9] + a[11]; x1r = a[8] - a[10]; x1i = a[9] - a[11]; x2r = a[12] + a[14]; x2i = a[13] + a[15]; x3r = a[12] - a[14]; x3i = a[13] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[2] = y1r + y5r; a[3] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[6] = y3r - y7i; a[7] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[8] = y0r - y4r; a[9] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[12] = y2r + y6i; a[13] = y2i - y6r; if (n > 16) { wk1r = WR2500; wk1i = WI2500; x0r = a[16] + a[18]; x0i = a[17] + a[19]; x1r = a[16] - a[18]; x1i = a[17] - a[19]; x2r = a[20] + a[22]; x2i = a[21] + a[23]; x3r = a[20] - a[22]; x3i = a[21] - a[23]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[24] + a[26]; x0i = a[25] + a[27]; x1r = a[24] - a[26]; x1i = a[25] - a[27]; x2r = a[28] + a[30]; x2i = a[29] + a[31]; x3r = a[28] - a[30]; x3i = a[29] - a[31]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x3r - x1i; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; y7r = wk1r * x2r + wk1i * x2i; y7i = wk1r * x2i - wk1i * x2r; x0r = wk1r * y1r - wk1i * y1i; x0i = wk1r * y1i + wk1i * y1r; a[18] = x0r + y5r; a[19] = x0i + y5i; a[26] = y5i - x0i; a[27] = x0r - y5r; x0r = wk1i * y3r - wk1r * y3i; x0i = wk1i * y3i + wk1r * y3r; a[22] = x0r - y7r; a[23] = x0i + y7i; a[30] = y7i - x0i; a[31] = x0r + y7r; a[16] = y0r + y4r; a[17] = y0i + y4i; a[24] = y4i - y0i; a[25] = y0r - y4r; x0r = y2r - y6i; x0i = y2i + y6r; a[20] = wn4r * (x0r - x0i); a[21] = wn4r * (x0i + x0r); x0r = y6r - y2i; x0i = y2r + y6i; a[28] = wn4r * (x0r - x0i); a[29] = wn4r * (x0i + x0r); ew = M_PI_2 / n; kr = n >> 2; for (j = 32; j < n; j += 16) { for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); wk1r = cos(ew * kr); wk1i = sin(ew * kr); wk2r = 1 - 2 * wk1i * wk1i; wk2i = 2 * wk1i * wk1r; wtmp = 2 * wk2i; wk3r = wk1r - wtmp * wk1i; wk3i = wtmp * wk1r - wk1i; wk4r = 1 - wtmp * wk2i; wk4i = wtmp * wk2r; wtmp = 2 * wk4i; wk5r = wk3r - wtmp * wk1i; wk5i = wtmp * wk1r - wk3i; wk6r = wk2r - wtmp * wk2i; wk6i = wtmp * wk2r - wk2i; wk7r = wk1r - wtmp * wk3i; wk7i = wtmp * wk3r - wk1i; x0r = a[j] + a[j + 2]; x0i = a[j + 1] + a[j + 3]; x1r = a[j] - a[j + 2]; x1i = a[j + 1] - a[j + 3]; x2r = a[j + 4] + a[j + 6]; x2i = a[j + 5] + a[j + 7]; x3r = a[j + 4] - a[j + 6]; x3i = a[j + 5] - a[j + 7]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j + 8] + a[j + 10]; x0i = a[j + 9] + a[j + 11]; x1r = a[j + 8] - a[j + 10]; x1i = a[j + 9] - a[j + 11]; x2r = a[j + 12] + a[j + 14]; x2i = a[j + 13] + a[j + 15]; x3r = a[j + 12] - a[j + 14]; x3i = a[j + 13] - a[j + 15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); x0r = y1r + y5r; x0i = y1i + y5i; a[j + 2] = wk1r * x0r - wk1i * x0i; a[j + 3] = wk1r * x0i + wk1i * x0r; x0r = y1r - y5r; x0i = y1i - y5i; a[j + 10] = wk5r * x0r - wk5i * x0i; a[j + 11] = wk5r * x0i + wk5i * x0r; x0r = y3r - y7i; x0i = y3i + y7r; a[j + 6] = wk3r * x0r - wk3i * x0i; a[j + 7] = wk3r * x0i + wk3i * x0r; x0r = y3r + y7i; x0i = y3i - y7r; a[j + 14] = wk7r * x0r - wk7i * x0i; a[j + 15] = wk7r * x0i + wk7i * x0r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; x0r = y0r - y4r; x0i = y0i - y4i; a[j + 8] = wk4r * x0r - wk4i * x0i; a[j + 9] = wk4r * x0i + wk4i * x0r; x0r = y2r - y6i; x0i = y2i + y6r; a[j + 4] = wk2r * x0r - wk2i * x0i; a[j + 5] = wk2r * x0i + wk2i * x0r; x0r = y2r + y6i; x0i = y2i - y6r; a[j + 12] = wk6r * x0r - wk6i * x0i; a[j + 13] = wk6r * x0i + wk6i * x0r; } } } void cftmdl(int n, int l, double *a) { int j, j1, j2, j3, j4, j5, j6, j7, k, kj, kr, m; double ew, wn4r, wtmp, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, wk4r, wk4i, wk5r, wk5i, wk6r, wk6i, wk7r, wk7i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; m = l << 3; wn4r = WR5000; for (j = 0; j < l; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[j1] = y1r + y5r; a[j1 + 1] = y1i + y5i; a[j5] = y1r - y5r; a[j5 + 1] = y1i - y5i; a[j3] = y3r - y7i; a[j3 + 1] = y3i + y7r; a[j7] = y3r + y7i; a[j7 + 1] = y3i - y7r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; a[j4] = y0r - y4r; a[j4 + 1] = y0i - y4i; a[j2] = y2r - y6i; a[j2 + 1] = y2i + y6r; a[j6] = y2r + y6i; a[j6 + 1] = y2i - y6r; } if (m < n) { wk1r = WR2500; wk1i = WI2500; for (j = m; j < l + m; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x3r - x1i; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; y7r = wk1r * x2r + wk1i * x2i; y7i = wk1r * x2i - wk1i * x2r; x0r = wk1r * y1r - wk1i * y1i; x0i = wk1r * y1i + wk1i * y1r; a[j1] = x0r + y5r; a[j1 + 1] = x0i + y5i; a[j5] = y5i - x0i; a[j5 + 1] = x0r - y5r; x0r = wk1i * y3r - wk1r * y3i; x0i = wk1i * y3i + wk1r * y3r; a[j3] = x0r - y7r; a[j3 + 1] = x0i + y7i; a[j7] = y7i - x0i; a[j7 + 1] = x0r + y7r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; a[j4] = y4i - y0i; a[j4 + 1] = y0r - y4r; x0r = y2r - y6i; x0i = y2i + y6r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = y6r - y2i; x0i = y2r + y6i; a[j6] = wn4r * (x0r - x0i); a[j6 + 1] = wn4r * (x0i + x0r); } ew = M_PI_2 / n; kr = n >> 2; for (k = 2 * m; k < n; k += m) { for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); wk1r = cos(ew * kr); wk1i = sin(ew * kr); wk2r = 1 - 2 * wk1i * wk1i; wk2i = 2 * wk1i * wk1r; wtmp = 2 * wk2i; wk3r = wk1r - wtmp * wk1i; wk3i = wtmp * wk1r - wk1i; wk4r = 1 - wtmp * wk2i; wk4i = wtmp * wk2r; wtmp = 2 * wk4i; wk5r = wk3r - wtmp * wk1i; wk5i = wtmp * wk1r - wk3i; wk6r = wk2r - wtmp * wk2i; wk6i = wtmp * wk2r - wk2i; wk7r = wk1r - wtmp * wk3i; wk7i = wtmp * wk3r - wk1i; for (j = k; j < l + k; j += 2) { j1 = j + l; j2 = j1 + l; j3 = j2 + l; j4 = j3 + l; j5 = j4 + l; j6 = j5 + l; j7 = j6 + l; x0r = a[j] + a[j1]; x0i = a[j + 1] + a[j1 + 1]; x1r = a[j] - a[j1]; x1i = a[j + 1] - a[j1 + 1]; x2r = a[j2] + a[j3]; x2i = a[j2 + 1] + a[j3 + 1]; x3r = a[j2] - a[j3]; x3i = a[j2 + 1] - a[j3 + 1]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[j4] + a[j5]; x0i = a[j4 + 1] + a[j5 + 1]; x1r = a[j4] - a[j5]; x1i = a[j4 + 1] - a[j5 + 1]; x2r = a[j6] + a[j7]; x2i = a[j6 + 1] + a[j7 + 1]; x3r = a[j6] - a[j7]; x3i = a[j6 + 1] - a[j7 + 1]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); x0r = y1r + y5r; x0i = y1i + y5i; a[j1] = wk1r * x0r - wk1i * x0i; a[j1 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r - y5r; x0i = y1i - y5i; a[j5] = wk5r * x0r - wk5i * x0i; a[j5 + 1] = wk5r * x0i + wk5i * x0r; x0r = y3r - y7i; x0i = y3i + y7r; a[j3] = wk3r * x0r - wk3i * x0i; a[j3 + 1] = wk3r * x0i + wk3i * x0r; x0r = y3r + y7i; x0i = y3i - y7r; a[j7] = wk7r * x0r - wk7i * x0i; a[j7 + 1] = wk7r * x0i + wk7i * x0r; a[j] = y0r + y4r; a[j + 1] = y0i + y4i; x0r = y0r - y4r; x0i = y0i - y4i; a[j4] = wk4r * x0r - wk4i * x0i; a[j4 + 1] = wk4r * x0i + wk4i * x0r; x0r = y2r - y6i; x0i = y2i + y6r; a[j2] = wk2r * x0r - wk2i * x0i; a[j2 + 1] = wk2r * x0i + wk2i * x0r; x0r = y2r + y6i; x0i = y2i - y6r; a[j6] = wk6r * x0r - wk6i * x0i; a[j6 + 1] = wk6r * x0i + wk6i * x0r; } } } } void rftfsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[j + 2] -= yr; a[j + 3] -= yi; a[k - 2] += yr; a[k - 1] -= yi; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[2] -= yr; a[3] -= yi; a[n - 2] += yr; a[n - 1] -= yi; } void rftbsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; a[i + 1] = -a[i + 1]; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[j + 2] -= yr; a[j + 3] = yi - a[j + 3]; a[k - 2] += yr; a[k - 1] = yi - a[k - 1]; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] = yi - a[j + 1]; a[k] += yr; a[k + 1] = yi - a[k + 1]; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[2] -= yr; a[3] = yi - a[3]; a[n - 2] += yr; a[n - 1] = yi - a[n - 1]; a[1] = -a[1]; } void dctsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[j - 1] - wdr * a[k + 1]; xi = wdr * a[j - 1] + wdi * a[k + 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[j] - wkr * a[k]; yi = wkr * a[j] + wki * a[k]; wdr -= ss * wki; wdi += ss * wkr; a[k + 1] = xr; a[k] = yr; a[j - 1] = xi; a[j] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m - 1] - wdr * a[m + 1]; a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1]; a[m + 1] = xr; a[m] *= wki + ss * wdr; } void dstsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[k + 1] - wdr * a[j - 1]; xi = wdr * a[k + 1] + wdi * a[j - 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[k] - wkr * a[j]; yi = wkr * a[k] + wki * a[j]; wdr -= ss * wki; wdi += ss * wkr; a[j - 1] = xr; a[j] = yr; a[k + 1] = xi; a[k] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m + 1] - wdr * a[m - 1]; a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1]; a[m - 1] = xr; a[m] *= wki + ss * wdr; } void dctsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[1] - wdr * a[3]; a[1] = wdr * a[1] + wdi * a[3]; a[3] = xr; } a[m] *= wki; } void dstsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[3] - wdr * a[1]; a[3] = wdr * a[3] + wdi * a[1]; a[1] = xr; } a[m] *= wki; } OouraFFT-1.0/fftsg.c000066400000000000000000002561051246725775500143110ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :split-radix data :inplace table :use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *, int *, double *); void rdft(int, int, double *, int *, double *); void ddct(int, int, double *, int *, double *); void ddst(int, int, double *, int *, double *); void dfct(int, double *, double *, int *, double *); void dfst(int, double *, double *, int *, double *); macro definitions USE_CDFT_PTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 USE_CDFT_WINTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k ip[0] = 0; // first time only cdft(2*n, 1, a, ip, w); ip[0] = 0; // first time only cdft(2*n, -1, a, ip, w); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j= 2+sqrt(n) strictly, length of ip >= 2+(1<<(int)(log(n+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft(2*n, -1, a, ip, w); is cdft(2*n, 1, a, ip, w); for (j = 0; j <= 2 * n - 1; j++) { a[j] *= 1.0 / n; } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k ip[0] = 0; // first time only rdft(n, 1, a, ip, w); ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n/2-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft(n, 1, a, ip, w); is rdft(n, -1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ip[0] = 0; // first time only ddct(n, 1, a, ip, w); ip[0] = 0; // first time only ddct(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct(n, -1, a, ip, w); is a[0] *= 0.5; ddct(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ip[0] = 0; // first time only ddst(n, 1, a, ip, w); ip[0] = 0; // first time only ddst(n, -1, a, ip, w); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2+sqrt(n/2) strictly, length of ip >= 2+(1<<(int)(log(n/2+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/4-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst(n, -1, a, ip, w); is a[0] *= 0.5; ddst(n, 1, a, ip, w); for (j = 0; j <= n - 1; j++) { a[j] *= 2.0 / n; } . -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- [definition] C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n [usage] ip[0] = 0; // first time only dfct(n, a, t, ip, w); [parameters] n :data length - 1 (int) n >= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n t[0...n/2] :work area (double *) ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0= 2+sqrt(n/4) strictly, length of ip >= 2+(1<<(int)(log(n/4+0.5)/log(2))/2). ip[0],ip[1] are pointers of the cos/sin table. w[0...n*5/8-1] :cos/sin table (double *) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of dfst(n, a, t, ip, w); is dfst(n, a, t, ip, w); for (j = 1; j <= n - 1; j++) { a[j] *= 2.0 / n; } . Appendix : The cos/sin table is recalculated when the larger table required. w[] and ip[] are compatible with all routines. */ void cdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void cftfsub(int n, double *a, int *ip, int nw, double *w); void cftbsub(int n, double *a, int *ip, int nw, double *w); int nw; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } if (isgn >= 0) { cftfsub(n, a, ip, nw, w); } else { cftbsub(n, a, ip, nw, w); } } void rdft(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void cftfsub(int n, double *a, int *ip, int nw, double *w); void cftbsub(int n, double *a, int *ip, int nw, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); int nw, nc; double xi; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 2)) { nc = n >> 2; makect(nc, ip, w + nw); } if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } } void ddct(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void cftfsub(int n, double *a, int *ip, int nw, double *w); void cftbsub(int n, double *a, int *ip, int nw, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dctsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void cftfsub(int n, double *a, int *ip, int nw, double *w); void cftbsub(int n, double *a, int *ip, int nw, double *w); void rftfsub(int n, double *a, int nc, double *c); void rftbsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, nw, nc; double xr; nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a, nc, w + nw); cftbsub(n, a, ip, nw, w); } else if (n == 4) { cftbsub(n, a, ip, nw, w); } } dstsub(n, a, nc, w + nw); if (isgn >= 0) { if (n > 4) { cftfsub(n, a, ip, nw, w); rftfsub(n, a, nc, w + nw); } else if (n == 4) { cftfsub(n, a, ip, nw, w); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void cftfsub(int n, double *a, int *ip, int nw, double *w); void rftfsub(int n, double *a, int nc, double *c); void dctsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } m = n >> 1; yi = a[m]; xi = a[0] + a[n]; a[0] -= a[n]; t[0] = xi - yi; t[m] = xi + yi; if (n > 2) { mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] - a[n - j]; xi = a[j] + a[n - j]; yr = a[k] - a[n - k]; yi = a[k] + a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi - yi; t[k] = xi + yi; } t[mh] = a[mh] + a[n - mh]; a[mh] -= a[n - mh]; dctsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[0] - a[1]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] + a[j + 1]; a[2 * j - 1] = a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dctsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[0] - t[1]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = t[j] - t[j + 1]; a[k + l] = t[j] + t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 0; j < mh; j++) { k = m - j; t[j] = t[m + k] - t[m + j]; t[k] = t[m + k] + t[m + j]; } t[mh] = t[m + mh]; m = mh; } a[l] = t[0]; a[n] = t[2] - t[1]; a[0] = t[2] + t[1]; } else { a[1] = a[0]; a[2] = t[0]; a[0] = t[1]; } } void dfst(int n, double *a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void cftfsub(int n, double *a, int *ip, int nw, double *w); void rftfsub(int n, double *a, int nc, double *c); void dstsub(int n, double *a, int nc, double *c); int j, k, l, m, mh, nw, nc; double xr, xi, yr, yi; nw = ip[0]; if (n > (nw << 3)) { nw = n >> 3; makewt(nw, ip, w); } nc = ip[1]; if (n > (nc << 1)) { nc = n >> 1; makect(nc, ip, w + nw); } if (n > 2) { m = n >> 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[j] + a[n - j]; xi = a[j] - a[n - j]; yr = a[k] + a[n - k]; yi = a[k] - a[n - k]; a[j] = xr; a[k] = yr; t[j] = xi + yi; t[k] = xi - yi; } t[0] = a[mh] - a[n - mh]; a[mh] += a[n - mh]; a[0] = a[m]; dstsub(m, a, nc, w + nw); if (m > 4) { cftfsub(m, a, ip, nw, w); rftfsub(m, a, nc, w + nw); } else if (m == 4) { cftfsub(m, a, ip, nw, w); } a[n - 1] = a[1] - a[0]; a[1] = a[0] + a[1]; for (j = m - 2; j >= 2; j -= 2) { a[2 * j + 1] = a[j] - a[j + 1]; a[2 * j - 1] = -a[j] - a[j + 1]; } l = 2; m = mh; while (m >= 2) { dstsub(m, t, nc, w + nw); if (m > 4) { cftfsub(m, t, ip, nw, w); rftfsub(m, t, nc, w + nw); } else if (m == 4) { cftfsub(m, t, ip, nw, w); } a[n - l] = t[1] - t[0]; a[l] = t[0] + t[1]; k = 0; for (j = 2; j < m; j += 2) { k += l << 2; a[k - l] = -t[j] - t[j + 1]; a[k + l] = t[j] - t[j + 1]; } l <<= 1; mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; t[j] = t[m + k] + t[m + j]; t[k] = t[m + k] - t[m + j]; } t[0] = t[m + mh]; m = mh; } a[l] = t[0]; } a[0] = 0; } /* -------- initializing routines -------- */ #include void makewt(int nw, int *ip, double *w) { void makeipt(int nw, int *ip); int j, nwh, nw0, nw1; double delta, wn4r, wk1r, wk1i, wk3r, wk3i; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; delta = atan(1.0) / nwh; wn4r = cos(delta * nwh); w[0] = 1; w[1] = wn4r; if (nwh == 4) { w[2] = cos(delta * 2); w[3] = sin(delta * 2); } else if (nwh > 4) { makeipt(nw, ip); w[2] = 0.5 / cos(delta * 2); w[3] = 0.5 / cos(delta * 6); for (j = 4; j < nwh; j += 4) { w[j] = cos(delta * j); w[j + 1] = sin(delta * j); w[j + 2] = cos(3 * delta * j); w[j + 3] = -sin(3 * delta * j); } } nw0 = 0; while (nwh > 2) { nw1 = nw0 + nwh; nwh >>= 1; w[nw1] = 1; w[nw1 + 1] = wn4r; if (nwh == 4) { wk1r = w[nw0 + 4]; wk1i = w[nw0 + 5]; w[nw1 + 2] = wk1r; w[nw1 + 3] = wk1i; } else if (nwh > 4) { wk1r = w[nw0 + 4]; wk3r = w[nw0 + 6]; w[nw1 + 2] = 0.5 / wk1r; w[nw1 + 3] = 0.5 / wk3r; for (j = 4; j < nwh; j += 4) { wk1r = w[nw0 + 2 * j]; wk1i = w[nw0 + 2 * j + 1]; wk3r = w[nw0 + 2 * j + 2]; wk3i = w[nw0 + 2 * j + 3]; w[nw1 + j] = wk1r; w[nw1 + j + 1] = wk1i; w[nw1 + j + 2] = wk3r; w[nw1 + j + 3] = wk3i; } } nw0 = nw1; } } } void makeipt(int nw, int *ip) { int j, l, m, m2, p, q; ip[2] = 0; ip[3] = 16; m = 2; for (l = nw; l > 32; l >>= 2) { m2 = m << 1; q = m2 << 3; for (j = m; j < m2; j++) { p = ip[j] << 2; ip[m + j] = p; ip[m2 + j] = p + q; } m = m2; } } void makect(int nc, int *ip, double *c) { int j, nch; double delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; delta = atan(1.0) / nch; c[0] = cos(delta * nch); c[nch] = 0.5 * c[0]; for (j = 1; j < nch; j++) { c[j] = 0.5 * cos(delta * j); c[nc - j] = 0.5 * sin(delta * j); } } } /* -------- child routines -------- */ #ifdef USE_CDFT_PTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 8192 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 65536 #endif #include #include #include #define cdft_thread_t pthread_t #define cdft_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #endif /* USE_CDFT_PTHREADS */ #ifdef USE_CDFT_WINTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 32768 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 524288 #endif #include #include #include #define cdft_thread_t HANDLE #define cdft_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_CDFT_WINTHREADS */ void cftfsub(int n, double *a, int *ip, int nw, double *w) { void bitrv2(int n, int *ip, double *a); void bitrv216(double *a); void bitrv208(double *a); void cftf1st(int n, double *a, double *w); void cftrec4(int n, double *a, int nw, double *w); void cftleaf(int n, int isplt, double *a, int nw, double *w); void cftfx41(int n, double *a, int nw, double *w); void cftf161(double *a, double *w); void cftf081(double *a, double *w); void cftf040(double *a); void cftx020(double *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, double *a, int nw, double *w); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftf1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216(a); } else { cftf081(a, w); bitrv208(a); } } else if (n == 8) { cftf040(a); } else if (n == 4) { cftx020(a); } } void cftbsub(int n, double *a, int *ip, int nw, double *w) { void bitrv2conj(int n, int *ip, double *a); void bitrv216neg(double *a); void bitrv208neg(double *a); void cftb1st(int n, double *a, double *w); void cftrec4(int n, double *a, int nw, double *w); void cftleaf(int n, int isplt, double *a, int nw, double *w); void cftfx41(int n, double *a, int nw, double *w); void cftf161(double *a, double *w); void cftf081(double *a, double *w); void cftb040(double *a); void cftx020(double *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, double *a, int nw, double *w); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftb1st(n, a, &w[nw - (n >> 2)]); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a, nw, w); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a, nw, w); } else if (n > 128) { cftleaf(n, 1, a, nw, w); } else { cftfx41(n, a, nw, w); } bitrv2conj(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); bitrv216neg(a); } else { cftf081(a, w); bitrv208neg(a); } } else if (n == 8) { cftb040(a); } else if (n == 4) { cftx020(a); } } void bitrv2(int n, int *ip, double *a) { int j, j1, k, k1, l, m, nh, nm; double xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } } } void bitrv2conj(int n, int *ip, double *a) { int j, j1, k, k1, l, m, nh, nm; double xr, xi, yr, yi; m = 1; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; nm = 4 * m; if (l == 8) { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + 2 * ip[m + k]; k1 = 4 * k + 2 * ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + 2 * ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += 2 * nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - nm; k1 += 2 * nm - 2; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } else { for (k = 0; k < m; k++) { for (j = 0; j < k; j++) { j1 = 4 * j + ip[m + k]; k1 = 4 * k + ip[m + j]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nm; k1 += nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nm; k1 -= nm; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; } k1 = 4 * k + ip[m + k]; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += nm; k1 += nm; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; } } } void bitrv216(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; a[2] = x8r; a[3] = x8i; a[4] = x4r; a[5] = x4i; a[6] = x12r; a[7] = x12i; a[8] = x2r; a[9] = x2i; a[10] = x10r; a[11] = x10i; a[14] = x14r; a[15] = x14i; a[16] = x1r; a[17] = x1i; a[20] = x5r; a[21] = x5i; a[22] = x13r; a[23] = x13i; a[24] = x3r; a[25] = x3i; a[26] = x11r; a[27] = x11i; a[28] = x7r; a[29] = x7i; } void bitrv216neg(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i, x15r, x15i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x9r = a[18]; x9i = a[19]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; x15r = a[30]; x15i = a[31]; a[2] = x15r; a[3] = x15i; a[4] = x7r; a[5] = x7i; a[6] = x11r; a[7] = x11i; a[8] = x3r; a[9] = x3i; a[10] = x13r; a[11] = x13i; a[12] = x5r; a[13] = x5i; a[14] = x9r; a[15] = x9i; a[16] = x1r; a[17] = x1i; a[18] = x14r; a[19] = x14i; a[20] = x6r; a[21] = x6i; a[22] = x10r; a[23] = x10i; a[24] = x2r; a[25] = x2i; a[26] = x12r; a[27] = x12i; a[28] = x4r; a[29] = x4i; a[30] = x8r; a[31] = x8i; } void bitrv208(double *a) { double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; x1r = a[2]; x1i = a[3]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x6r = a[12]; x6i = a[13]; a[2] = x4r; a[3] = x4i; a[6] = x6r; a[7] = x6i; a[8] = x1r; a[9] = x1i; a[12] = x3r; a[13] = x3i; } void bitrv208neg(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; a[2] = x7r; a[3] = x7i; a[4] = x3r; a[5] = x3i; a[6] = x5r; a[7] = x5i; a[8] = x1r; a[9] = x1i; a[10] = x6r; a[11] = x6i; a[12] = x2r; a[13] = x2i; a[14] = x4r; a[15] = x4i; } void cftf1st(int n, double *a, double *w) { int j, j0, j1, j2, j3, k, m, mh; double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = a[j + 3] + a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = a[j + 3] - a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = a[j0 - 1] + a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i + y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i - y2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r - y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r + y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = a[j0 - 1] + a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i + x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = a[j0 + 3] + a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = a[j0 + 3] - a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i + x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } void cftb1st(int n, double *a, double *w) { int j, j0, j1, j2, j3, k, m, mh; double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = -a[1] - a[j2 + 1]; x1r = a[0] - a[j2]; x1i = -a[1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j2] = x1r + x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r - x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; csc1 = w[2]; csc3 = w[3]; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; k = 0; for (j = 2; j < mh - 2; j += 4) { k += 4; wk1r = csc1 * (wd1r + w[k]); wk1i = csc1 * (wd1i + w[k + 1]); wk3r = csc3 * (wd3r + w[k + 2]); wk3i = csc3 * (wd3i + w[k + 3]); wd1r = w[k]; wd1i = w[k + 1]; wd3r = w[k + 2]; wd3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = -a[j + 1] - a[j2 + 1]; x1r = a[j] - a[j2]; x1i = -a[j + 1] + a[j2 + 1]; y0r = a[j + 2] + a[j2 + 2]; y0i = -a[j + 3] - a[j2 + 3]; y1r = a[j + 2] - a[j2 + 2]; y1i = -a[j + 3] + a[j2 + 3]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 + 2] + a[j3 + 2]; y2i = a[j1 + 3] + a[j3 + 3]; y3r = a[j1 + 2] - a[j3 + 2]; y3i = a[j1 + 3] - a[j3 + 3]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j + 2] = y0r + y2r; a[j + 3] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; y0r = a[j0 - 2] + a[j2 - 2]; y0i = -a[j0 - 1] - a[j2 - 1]; y1r = a[j0 - 2] - a[j2 - 2]; y1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; y2r = a[j1 - 2] + a[j3 - 2]; y2i = a[j1 - 1] + a[j3 - 1]; y3r = a[j1 - 2] - a[j3 - 2]; y3i = a[j1 - 1] - a[j3 - 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i - y2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i + y2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = y1r + y3i; x0i = y1i + y3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = y1r - y3i; x0i = y1i - y3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; } wk1r = csc1 * (wd1r + wn4r); wk1i = csc1 * (wd1i + wn4r); wk3r = csc3 * (wd3r - wn4r); wk3i = csc3 * (wd3i - wn4r); j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = -a[j0 - 1] - a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i - x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = -a[j0 + 3] - a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = -a[j0 + 3] + a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i - x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } #ifdef USE_CDFT_THREADS struct cdft_arg_st { int n0; int n; double *a; int nw; double *w; }; typedef struct cdft_arg_st cdft_arg_t; void cftrec4_th(int n, double *a, int nw, double *w) { void *cftrec1_th(void *p); void *cftrec2_th(void *p); int i, idiv4, m, nthread; cdft_thread_t th[4]; cdft_arg_t ag[4]; nthread = 2; idiv4 = 0; m = n >> 1; if (n > CDFT_4THREADS_BEGIN_N) { nthread = 4; idiv4 = 1; m >>= 1; } for (i = 0; i < nthread; i++) { ag[i].n0 = n; ag[i].n = m; ag[i].a = &a[i * m]; ag[i].nw = nw; ag[i].w = w; if (i != idiv4) { cdft_thread_create(&th[i], cftrec1_th, &ag[i]); } else { cdft_thread_create(&th[i], cftrec2_th, &ag[i]); } } for (i = 0; i < nthread; i++) { cdft_thread_wait(th[i]); } } void *cftrec1_th(void *p) { int cfttree(int n, int j, int k, double *a, int nw, double *w); void cftleaf(int n, int isplt, double *a, int nw, double *w); void cftmdl1(int n, double *a, double *w); int isplt, j, k, m, n, n0, nw; double *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; m = n0; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } void *cftrec2_th(void *p) { int cfttree(int n, int j, int k, double *a, int nw, double *w); void cftleaf(int n, int isplt, double *a, int nw, double *w); void cftmdl2(int n, double *a, double *w); int isplt, j, k, m, n, n0, nw; double *a, *w; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; nw = ((cdft_arg_t *) p)->nw; w = ((cdft_arg_t *) p)->w; k = 1; m = n0; while (m > 512) { m >>= 2; k <<= 2; cftmdl2(m, &a[n - m], &w[nw - m]); } cftleaf(m, 0, &a[n - m], nw, w); k >>= 1; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } return (void *) 0; } #endif /* USE_CDFT_THREADS */ void cftrec4(int n, double *a, int nw, double *w) { int cfttree(int n, int j, int k, double *a, int nw, double *w); void cftleaf(int n, int isplt, double *a, int nw, double *w); void cftmdl1(int n, double *a, double *w); int isplt, j, k, m; m = n; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]); } cftleaf(m, 1, &a[n - m], nw, w); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a, nw, w); cftleaf(m, isplt, &a[j - m], nw, w); } } int cfttree(int n, int j, int k, double *a, int nw, double *w) { void cftmdl1(int n, double *a, double *w); void cftmdl2(int n, double *a, double *w); int i, isplt, m; if ((k & 3) != 0) { isplt = k & 1; if (isplt != 0) { cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]); } else { cftmdl2(n, &a[j - n], &w[nw - n]); } } else { m = n; for (i = k; (i & 3) == 0; i >>= 2) { m <<= 2; } isplt = i & 1; if (isplt != 0) { while (m > 128) { cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]); m >>= 2; } } else { while (m > 128) { cftmdl2(m, &a[j - m], &w[nw - m]); m >>= 2; } } } return isplt; } void cftleaf(int n, int isplt, double *a, int nw, double *w) { void cftmdl1(int n, double *a, double *w); void cftmdl2(int n, double *a, double *w); void cftf161(double *a, double *w); void cftf162(double *a, double *w); void cftf081(double *a, double *w); void cftf082(double *a, double *w); if (n == 512) { cftmdl1(128, a, &w[nw - 64]); cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); cftmdl2(128, &a[128], &w[nw - 128]); cftf161(&a[128], &w[nw - 8]); cftf162(&a[160], &w[nw - 32]); cftf161(&a[192], &w[nw - 8]); cftf162(&a[224], &w[nw - 32]); cftmdl1(128, &a[256], &w[nw - 64]); cftf161(&a[256], &w[nw - 8]); cftf162(&a[288], &w[nw - 32]); cftf161(&a[320], &w[nw - 8]); cftf161(&a[352], &w[nw - 8]); if (isplt != 0) { cftmdl1(128, &a[384], &w[nw - 64]); cftf161(&a[480], &w[nw - 8]); } else { cftmdl2(128, &a[384], &w[nw - 128]); cftf162(&a[480], &w[nw - 32]); } cftf161(&a[384], &w[nw - 8]); cftf162(&a[416], &w[nw - 32]); cftf161(&a[448], &w[nw - 8]); } else { cftmdl1(64, a, &w[nw - 32]); cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); cftmdl2(64, &a[64], &w[nw - 64]); cftf081(&a[64], &w[nw - 8]); cftf082(&a[80], &w[nw - 8]); cftf081(&a[96], &w[nw - 8]); cftf082(&a[112], &w[nw - 8]); cftmdl1(64, &a[128], &w[nw - 32]); cftf081(&a[128], &w[nw - 8]); cftf082(&a[144], &w[nw - 8]); cftf081(&a[160], &w[nw - 8]); cftf081(&a[176], &w[nw - 8]); if (isplt != 0) { cftmdl1(64, &a[192], &w[nw - 32]); cftf081(&a[240], &w[nw - 8]); } else { cftmdl2(64, &a[192], &w[nw - 64]); cftf082(&a[240], &w[nw - 8]); } cftf081(&a[192], &w[nw - 8]); cftf082(&a[208], &w[nw - 8]); cftf081(&a[224], &w[nw - 8]); } } void cftmdl1(int n, double *a, double *w) { int j, j0, j1, j2, j3, k, m, mh; double wn4r, wk1r, wk1i, wk3r, wk3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wn4r = w[1]; k = 0; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; } j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wn4r * (x0r - x0i); a[j2 + 1] = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wn4r * (x0r + x0i); a[j3 + 1] = -wn4r * (x0i - x0r); } void cftmdl2(int n, double *a, double *w) { int j, j0, j1, j2, j3, k, kr, m, mh; double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; mh = n >> 3; m = 2 * mh; wn4r = w[1]; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] - a[j2 + 1]; x0i = a[1] + a[j2]; x1r = a[0] + a[j2 + 1]; x1i = a[1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wn4r * (x2r - x2i); y0i = wn4r * (x2i + x2r); a[0] = x0r + y0r; a[1] = x0i + y0i; a[j1] = x0r - y0r; a[j1 + 1] = x0i - y0i; y0r = wn4r * (x3r - x3i); y0i = wn4r * (x3i + x3r); a[j2] = x1r - y0i; a[j2 + 1] = x1i + y0r; a[j3] = x1r + y0i; a[j3 + 1] = x1i - y0r; k = 0; kr = 2 * m; for (j = 2; j < mh; j += 2) { k += 4; wk1r = w[k]; wk1i = w[k + 1]; wk3r = w[k + 2]; wk3i = w[k + 3]; kr -= 4; wd1i = w[kr]; wd1r = w[kr + 1]; wd3i = w[kr + 2]; wd3r = w[kr + 3]; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] - a[j2 + 1]; x0i = a[j + 1] + a[j2]; x1r = a[j] + a[j2 + 1]; x1i = a[j + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wd1r * x2r - wd1i * x2i; y2i = wd1r * x2i + wd1i * x2r; a[j] = y0r + y2r; a[j + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk3r * x1r + wk3i * x1i; y0i = wk3r * x1i - wk3i * x1r; y2r = wd3r * x3r + wd3i * x3i; y2i = wd3r * x3i - wd3i * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wd1i * x0r - wd1r * x0i; y0i = wd1i * x0i + wd1r * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wd3i * x1r + wd3r * x1i; y0i = wd3i * x1i - wd3r * x1r; y2r = wk3i * x3r + wk3r * x3i; y2i = wk3i * x3i - wk3r * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; } wk1r = w[m]; wk1i = w[m + 1]; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk1i * x1r - wk1r * x1i; y0i = wk1i * x1i + wk1r * x1r; y2r = wk1r * x3r - wk1i * x3i; y2i = wk1r * x3i + wk1i * x3r; a[j2] = y0r - y2r; a[j2 + 1] = y0i - y2i; a[j3] = y0r + y2r; a[j3 + 1] = y0i + y2i; } void cftfx41(int n, double *a, int nw, double *w) { void cftf161(double *a, double *w); void cftf162(double *a, double *w); void cftf081(double *a, double *w); void cftf082(double *a, double *w); if (n == 128) { cftf161(a, &w[nw - 8]); cftf162(&a[32], &w[nw - 32]); cftf161(&a[64], &w[nw - 8]); cftf161(&a[96], &w[nw - 8]); } else { cftf081(a, &w[nw - 8]); cftf082(&a[16], &w[nw - 8]); cftf081(&a[32], &w[nw - 8]); cftf081(&a[48], &w[nw - 8]); } } void cftf161(double *a, double *w) { double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; x0r = a[0] + a[16]; x0i = a[1] + a[17]; x1r = a[0] - a[16]; x1i = a[1] - a[17]; x2r = a[8] + a[24]; x2i = a[9] + a[25]; x3r = a[8] - a[24]; x3i = a[9] - a[25]; y0r = x0r + x2r; y0i = x0i + x2i; y4r = x0r - x2r; y4i = x0i - x2i; y8r = x1r - x3i; y8i = x1i + x3r; y12r = x1r + x3i; y12i = x1i - x3r; x0r = a[2] + a[18]; x0i = a[3] + a[19]; x1r = a[2] - a[18]; x1i = a[3] - a[19]; x2r = a[10] + a[26]; x2i = a[11] + a[27]; x3r = a[10] - a[26]; x3i = a[11] - a[27]; y1r = x0r + x2r; y1i = x0i + x2i; y5r = x0r - x2r; y5i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y9r = wk1r * x0r - wk1i * x0i; y9i = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y13r = wk1i * x0r - wk1r * x0i; y13i = wk1i * x0i + wk1r * x0r; x0r = a[4] + a[20]; x0i = a[5] + a[21]; x1r = a[4] - a[20]; x1i = a[5] - a[21]; x2r = a[12] + a[28]; x2i = a[13] + a[29]; x3r = a[12] - a[28]; x3i = a[13] - a[29]; y2r = x0r + x2r; y2i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y10r = wn4r * (x0r - x0i); y10i = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; y14r = wn4r * (x0r + x0i); y14i = wn4r * (x0i - x0r); x0r = a[6] + a[22]; x0i = a[7] + a[23]; x1r = a[6] - a[22]; x1i = a[7] - a[23]; x2r = a[14] + a[30]; x2i = a[15] + a[31]; x3r = a[14] - a[30]; x3i = a[15] - a[31]; y3r = x0r + x2r; y3i = x0i + x2i; y7r = x0r - x2r; y7i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y11r = wk1i * x0r - wk1r * x0i; y11i = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y15r = wk1r * x0r - wk1i * x0i; y15i = wk1r * x0i + wk1i * x0r; x0r = y12r - y14r; x0i = y12i - y14i; x1r = y12r + y14r; x1i = y12i + y14i; x2r = y13r - y15r; x2i = y13i - y15i; x3r = y13r + y15r; x3i = y13i + y15i; a[24] = x0r + x2r; a[25] = x0i + x2i; a[26] = x0r - x2r; a[27] = x0i - x2i; a[28] = x1r - x3i; a[29] = x1i + x3r; a[30] = x1r + x3i; a[31] = x1i - x3r; x0r = y8r + y10r; x0i = y8i + y10i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; x3r = y9r - y11r; x3i = y9i - y11i; a[16] = x0r + x2r; a[17] = x0i + x2i; a[18] = x0r - x2r; a[19] = x0i - x2i; a[20] = x1r - x3i; a[21] = x1i + x3r; a[22] = x1r + x3i; a[23] = x1i - x3r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); x0r = y5r + y7i; x0i = y5i - y7r; x3r = wn4r * (x0r - x0i); x3i = wn4r * (x0i + x0r); x0r = y4r - y6i; x0i = y4i + y6r; x1r = y4r + y6i; x1i = y4i - y6r; a[8] = x0r + x2r; a[9] = x0i + x2i; a[10] = x0r - x2r; a[11] = x0i - x2i; a[12] = x1r - x3i; a[13] = x1i + x3r; a[14] = x1r + x3i; a[15] = x1i - x3r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r + y3r; x2i = y1i + y3i; x3r = y1r - y3r; x3i = y1i - y3i; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x0r - x2r; a[3] = x0i - x2i; a[4] = x1r - x3i; a[5] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftf162(double *a, double *w) { double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = w[1]; wk1r = w[4]; wk1i = w[5]; wk3r = w[6]; wk3i = -w[7]; wk2r = w[8]; wk2i = w[9]; x1r = a[0] - a[17]; x1i = a[1] + a[16]; x0r = a[8] - a[25]; x0i = a[9] + a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y0r = x1r + x2r; y0i = x1i + x2i; y4r = x1r - x2r; y4i = x1i - x2i; x1r = a[0] + a[17]; x1i = a[1] - a[16]; x0r = a[8] + a[25]; x0i = a[9] - a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y8r = x1r - x2i; y8i = x1i + x2r; y12r = x1r + x2i; y12i = x1i - x2r; x0r = a[2] - a[19]; x0i = a[3] + a[18]; x1r = wk1r * x0r - wk1i * x0i; x1i = wk1r * x0i + wk1i * x0r; x0r = a[10] - a[27]; x0i = a[11] + a[26]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y1r = x1r + x2r; y1i = x1i + x2i; y5r = x1r - x2r; y5i = x1i - x2i; x0r = a[2] + a[19]; x0i = a[3] - a[18]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[10] + a[27]; x0i = a[11] - a[26]; x2r = wk1r * x0r + wk1i * x0i; x2i = wk1r * x0i - wk1i * x0r; y9r = x1r - x2r; y9i = x1i - x2i; y13r = x1r + x2r; y13i = x1i + x2i; x0r = a[4] - a[21]; x0i = a[5] + a[20]; x1r = wk2r * x0r - wk2i * x0i; x1i = wk2r * x0i + wk2i * x0r; x0r = a[12] - a[29]; x0i = a[13] + a[28]; x2r = wk2i * x0r - wk2r * x0i; x2i = wk2i * x0i + wk2r * x0r; y2r = x1r + x2r; y2i = x1i + x2i; y6r = x1r - x2r; y6i = x1i - x2i; x0r = a[4] + a[21]; x0i = a[5] - a[20]; x1r = wk2i * x0r - wk2r * x0i; x1i = wk2i * x0i + wk2r * x0r; x0r = a[12] + a[29]; x0i = a[13] - a[28]; x2r = wk2r * x0r - wk2i * x0i; x2i = wk2r * x0i + wk2i * x0r; y10r = x1r - x2r; y10i = x1i - x2i; y14r = x1r + x2r; y14i = x1i + x2i; x0r = a[6] - a[23]; x0i = a[7] + a[22]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[14] - a[31]; x0i = a[15] + a[30]; x2r = wk1i * x0r - wk1r * x0i; x2i = wk1i * x0i + wk1r * x0r; y3r = x1r + x2r; y3i = x1i + x2i; y7r = x1r - x2r; y7i = x1i - x2i; x0r = a[6] + a[23]; x0i = a[7] - a[22]; x1r = wk1i * x0r + wk1r * x0i; x1i = wk1i * x0i - wk1r * x0r; x0r = a[14] + a[31]; x0i = a[15] - a[30]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y11r = x1r + x2r; y11i = x1i + x2i; y15r = x1r - x2r; y15i = x1i - x2i; x1r = y0r + y2r; x1i = y0i + y2i; x2r = y1r + y3r; x2i = y1i + y3i; a[0] = x1r + x2r; a[1] = x1i + x2i; a[2] = x1r - x2r; a[3] = x1i - x2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r - y3r; x2i = y1i - y3i; a[4] = x1r - x2i; a[5] = x1i + x2r; a[6] = x1r + x2i; a[7] = x1i - x2r; x1r = y4r - y6i; x1i = y4i + y6r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[8] = x1r + x2r; a[9] = x1i + x2i; a[10] = x1r - x2r; a[11] = x1i - x2i; x1r = y4r + y6i; x1i = y4i - y6r; x0r = y5r + y7i; x0i = y5i - y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[12] = x1r - x2i; a[13] = x1i + x2r; a[14] = x1r + x2i; a[15] = x1i - x2r; x1r = y8r + y10r; x1i = y8i + y10i; x2r = y9r - y11r; x2i = y9i - y11i; a[16] = x1r + x2r; a[17] = x1i + x2i; a[18] = x1r - x2r; a[19] = x1i - x2i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; a[20] = x1r - x2i; a[21] = x1i + x2r; a[22] = x1r + x2i; a[23] = x1i - x2r; x1r = y12r - y14i; x1i = y12i + y14r; x0r = y13r + y15i; x0i = y13i - y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[24] = x1r + x2r; a[25] = x1i + x2i; a[26] = x1r - x2r; a[27] = x1i - x2i; x1r = y12r + y14i; x1i = y12i - y14r; x0r = y13r - y15i; x0i = y13i + y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[28] = x1r - x2i; a[29] = x1i + x2r; a[30] = x1r + x2i; a[31] = x1i - x2r; } void cftf081(double *a, double *w) { double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; x0r = a[0] + a[8]; x0i = a[1] + a[9]; x1r = a[0] - a[8]; x1i = a[1] - a[9]; x2r = a[4] + a[12]; x2i = a[5] + a[13]; x3r = a[4] - a[12]; x3i = a[5] - a[13]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[2] + a[10]; x0i = a[3] + a[11]; x1r = a[2] - a[10]; x1i = a[3] - a[11]; x2r = a[6] + a[14]; x2i = a[7] + a[15]; x3r = a[6] - a[14]; x3i = a[7] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[8] = y1r + y5r; a[9] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[12] = y3r - y7i; a[13] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[2] = y0r - y4r; a[3] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[6] = y2r + y6i; a[7] = y2i - y6r; } void cftf082(double *a, double *w) { double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = w[1]; wk1r = w[2]; wk1i = w[3]; y0r = a[0] - a[9]; y0i = a[1] + a[8]; y1r = a[0] + a[9]; y1i = a[1] - a[8]; x0r = a[4] - a[13]; x0i = a[5] + a[12]; y2r = wn4r * (x0r - x0i); y2i = wn4r * (x0i + x0r); x0r = a[4] + a[13]; x0i = a[5] - a[12]; y3r = wn4r * (x0r - x0i); y3i = wn4r * (x0i + x0r); x0r = a[2] - a[11]; x0i = a[3] + a[10]; y4r = wk1r * x0r - wk1i * x0i; y4i = wk1r * x0i + wk1i * x0r; x0r = a[2] + a[11]; x0i = a[3] - a[10]; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; x0r = a[6] - a[15]; x0i = a[7] + a[14]; y6r = wk1i * x0r - wk1r * x0i; y6i = wk1i * x0i + wk1r * x0r; x0r = a[6] + a[15]; x0i = a[7] - a[14]; y7r = wk1r * x0r - wk1i * x0i; y7i = wk1r * x0i + wk1i * x0r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y4r + y6r; x1i = y4i + y6i; a[0] = x0r + x1r; a[1] = x0i + x1i; a[2] = x0r - x1r; a[3] = x0i - x1i; x0r = y0r - y2r; x0i = y0i - y2i; x1r = y4r - y6r; x1i = y4i - y6i; a[4] = x0r - x1i; a[5] = x0i + x1r; a[6] = x0r + x1i; a[7] = x0i - x1r; x0r = y1r - y3i; x0i = y1i + y3r; x1r = y5r - y7r; x1i = y5i - y7i; a[8] = x0r + x1r; a[9] = x0i + x1i; a[10] = x0r - x1r; a[11] = x0i - x1i; x0r = y1r + y3i; x0i = y1i - y3r; x1r = y5r + y7r; x1i = y5i + y7i; a[12] = x0r - x1i; a[13] = x0i + x1r; a[14] = x0r + x1i; a[15] = x0i - x1r; } void cftf040(double *a) { double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftb040(double *a) { double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r + x3i; a[3] = x1i - x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r - x3i; a[7] = x1i + x3r; } void cftx020(double *a) { double x0r, x0i; x0r = a[0] - a[2]; x0i = a[1] - a[3]; a[0] += a[2]; a[1] += a[3]; a[2] = x0r; a[3] = x0i; } void rftfsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void rftbsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr, xi, yr, yi; m = n >> 1; ks = 2 * nc / m; kk = 0; for (j = 2; j < m; j += 2) { k = n - j; kk += ks; wkr = 0.5 - c[nc - kk]; wki = c[kk]; xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; } } void dctsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[j] - wkr * a[k]; a[j] = wkr * a[j] + wki * a[k]; a[k] = xr; } a[m] *= c[0]; } void dstsub(int n, double *a, int nc, double *c) { int j, k, kk, ks, m; double wkr, wki, xr; m = n >> 1; ks = nc / n; kk = 0; for (j = 1; j < m; j++) { k = n - j; kk += ks; wkr = c[kk] - c[nc - kk]; wki = c[kk] + c[nc - kk]; xr = wki * a[k] - wkr * a[j]; a[k] = wkr * a[k] + wki * a[j]; a[j] = xr; } a[m] *= c[0]; } OouraFFT-1.0/fftsg.f000066400000000000000000002511471246725775500143150ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :one ! data length :power of 2 ! decimation :frequency ! radix :split-radix ! data :inplace ! table :use ! subroutines ! cdft: Complex Discrete Fourier Transform ! rdft: Real Discrete Fourier Transform ! ddct: Discrete Cosine Transform ! ddst: Discrete Sine Transform ! dfct: Cosine Transform of RDFT (Real Symmetric DFT) ! dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k) = sum_j=0^n-1 x(j)*exp(2*pi*i*j*k/n), 0<=k ! X(k) = sum_j=0^n-1 x(j)*exp(-2*pi*i*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call cdft(2*n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft(2*n, -1, a, ip, w) ! [parameters] ! 2*n :data length (integer) ! n >= 1, n = power of 2 ! a(0:2*n-1) :input/output data (real*8) ! input data ! a(2*j) = Re(x(j)), ! a(2*j+1) = Im(x(j)), 0<=j= 2+sqrt(n) ! strictly, ! length of ip >= ! 2+2**(int(log(n+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft(2*n, -1, a, ip, w) ! is ! call cdft(2*n, 1, a, ip, w) ! do j = 0, 2 * n - 1 ! a(j) = a(j) / n ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k) = sum_j=0^n-1 a(j)*cos(2*pi*j*k/n), 0<=k<=n/2 ! I(k) = sum_j=0^n-1 a(j)*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) ! a(k) = (R(0) + R(n/2)*cos(pi*k))/2 + ! sum_j=1^n/2-1 R(j)*cos(2*pi*j*k/n) + ! sum_j=1^n/2-1 I(j)*sin(2*pi*j*k/n), 0<=k ! ip(0) = 0 ! first time only ! call rdft(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! output data ! a(2*k) = R(k), 0<=k ! input data ! a(2*j) = R(j), 0<=j= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n/2-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft(n, 1, a, ip, w) ! is ! call rdft(n, -1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k) = sum_j=0^n-1 a(j)*cos(pi*j*(k+1/2)/n), 0<=k DCT ! C(k) = sum_j=0^n-1 a(j)*cos(pi*(j+1/2)*k/n), 0<=k ! ip(0) = 0 ! first time only ! call ddct(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddct(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k) = sum_j=1^n A(j)*sin(pi*j*(k+1/2)/n), 0<=k DST ! S(k) = sum_j=0^n-1 a(j)*sin(pi*(j+1/2)*k/n), 0 ! ip(0) = 0 ! first time only ! call ddst(n, 1, a, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst(n, -1, a, ip, w) ! [parameters] ! n :data length (integer) ! n >= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! ! input data ! a(j) = A(j), 0 ! output data ! a(k) = S(k), 0= 2+sqrt(n/2) ! strictly, ! length of ip >= ! 2+2**(int(log(n/2+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/4-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst(n, -1, a, ip, w) ! is ! a(0) = a(0) / 2 ! call ddst(n, 1, a, ip, w) ! do j = 0, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Cosine Transform of RDFT (Real Symmetric DFT) -------- ! [definition] ! C(k) = sum_j=0^n a(j)*cos(pi*j*k/n), 0<=k<=n ! [usage] ! ip(0) = 0 ! first time only ! call dfct(n, a, t, ip, w) ! [parameters] ! n :data length - 1 (integer) ! n >= 2, n = power of 2 ! a(0:n) :input/output data (real*8) ! output data ! a(k) = C(k), 0<=k<=n ! t(0:n/2) :work area (real*8) ! ip(0:*) :work area for bit reversal (integer) ! length of ip >= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! is ! a(0) = a(0) / 2 ! a(n) = a(n) / 2 ! call dfct(n, a, t, ip, w) ! do j = 0, n ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- ! [definition] ! S(k) = sum_j=1^n-1 a(j)*sin(pi*j*k/n), 0= 2, n = power of 2 ! a(0:n-1) :input/output data (real*8) ! output data ! a(k) = S(k), 0= 2+sqrt(n/4) ! strictly, ! length of ip >= ! 2+2**(int(log(n/4+0.5)/log(2.0))/2). ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:n*5/8-1) :cos/sin table (real*8) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call dfst(n, a, t, ip, w) ! is ! call dfst(n, a, t, ip, w) ! do j = 1, n - 1 ! a(j) = a(j) * 2 / n ! end do ! . ! ! ! Appendix : ! The cos/sin table is recalculated when the larger table required. ! w() and ip() are compatible with all routines. ! ! subroutine cdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), nw real*8 a(0 : n - 1), w(0 : *) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if if (isgn .ge. 0) then call cftfsub(n, a, ip, nw, w) else call cftbsub(n, a, ip, nw, w) end if end ! subroutine rdft(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), nw, nc real*8 a(0 : n - 1), w(0 : *), xi nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 4 * nc) then nc = n / 4 call makect(nc, ip, w(nw)) end if if (isgn .ge. 0) then if (n .gt. 4) then call cftfsub(n, a, ip, nw, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, ip, nw, w) end if xi = a(0) - a(1) a(0) = a(0) + a(1) a(1) = xi else a(1) = 0.5d0 * (a(0) - a(1)) a(0) = a(0) - a(1) if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call cftbsub(n, a, ip, nw, w) else if (n .eq. 4) then call cftbsub(n, a, ip, nw, w) end if end if end ! subroutine ddct(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = a(j) - a(j - 1) a(j) = a(j) + a(j - 1) end do a(1) = a(0) - xr a(0) = a(0) + xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call cftbsub(n, a, ip, nw, w) else if (n .eq. 4) then call cftbsub(n, a, ip, nw, w) end if end if call dctsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call cftfsub(n, a, ip, nw, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, ip, nw, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = a(j) - a(j + 1) a(j) = a(j) + a(j + 1) end do a(n - 1) = xr end if end ! subroutine ddst(n, isgn, a, ip, w) integer n, isgn, ip(0 : *), j, nw, nc real*8 a(0 : n - 1), w(0 : *), xr nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then xr = a(n - 1) do j = n - 2, 2, -2 a(j + 1) = -a(j) - a(j - 1) a(j) = a(j) - a(j - 1) end do a(1) = a(0) + xr a(0) = a(0) - xr if (n .gt. 4) then call rftbsub(n, a, nc, w(nw)) call cftbsub(n, a, ip, nw, w) else if (n .eq. 4) then call cftbsub(n, a, ip, nw, w) end if end if call dstsub(n, a, nc, w(nw)) if (isgn .ge. 0) then if (n .gt. 4) then call cftfsub(n, a, ip, nw, w) call rftfsub(n, a, nc, w(nw)) else if (n .eq. 4) then call cftfsub(n, a, ip, nw, w) end if xr = a(0) - a(1) a(0) = a(0) + a(1) do j = 2, n - 2, 2 a(j - 1) = -a(j) - a(j + 1) a(j) = a(j) - a(j + 1) end do a(n - 1) = -xr end if end ! subroutine dfct(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n), t(0 : n / 2), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if m = n / 2 yi = a(m) xi = a(0) + a(n) a(0) = a(0) - a(n) t(0) = xi - yi t(m) = xi + yi if (n .gt. 2) then mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) - a(n - j) xi = a(j) + a(n - j) yr = a(k) - a(n - k) yi = a(k) + a(n - k) a(j) = xr a(k) = yr t(j) = xi - yi t(k) = xi + yi end do t(mh) = a(mh) + a(n - mh) a(mh) = a(mh) - a(n - mh) call dctsub(m, a, nc, w(nw)) if (m .gt. 4) then call cftfsub(m, a, ip, nw, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, ip, nw, w) end if a(n - 1) = a(0) - a(1) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) + a(j + 1) a(2 * j - 1) = a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dctsub(m, t, nc, w(nw)) if (m .gt. 4) then call cftfsub(m, t, ip, nw, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, ip, nw, w) end if a(n - l) = t(0) - t(1) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = t(j) - t(j + 1) a(k + l) = t(j) + t(j + 1) end do l = 2 * l mh = m / 2 do j = 0, mh - 1 k = m - j t(j) = t(m + k) - t(m + j) t(k) = t(m + k) + t(m + j) end do t(mh) = t(m + mh) m = mh end do a(l) = t(0) a(n) = t(2) - t(1) a(0) = t(2) + t(1) else a(1) = a(0) a(2) = t(0) a(0) = t(1) end if end ! subroutine dfst(n, a, t, ip, w) integer n, ip(0 : *), j, k, l, m, mh, nw, nc real*8 a(0 : n - 1), t(0 : n / 2 - 1), w(0 : *), xr, xi, yr, yi nw = ip(0) if (n .gt. 8 * nw) then nw = n / 8 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. 2 * nc) then nc = n / 2 call makect(nc, ip, w(nw)) end if if (n .gt. 2) then m = n / 2 mh = m / 2 do j = 1, mh - 1 k = m - j xr = a(j) + a(n - j) xi = a(j) - a(n - j) yr = a(k) + a(n - k) yi = a(k) - a(n - k) a(j) = xr a(k) = yr t(j) = xi + yi t(k) = xi - yi end do t(0) = a(mh) - a(n - mh) a(mh) = a(mh) + a(n - mh) a(0) = a(m) call dstsub(m, a, nc, w(nw)) if (m .gt. 4) then call cftfsub(m, a, ip, nw, w) call rftfsub(m, a, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, a, ip, nw, w) end if a(n - 1) = a(1) - a(0) a(1) = a(0) + a(1) do j = m - 2, 2, -2 a(2 * j + 1) = a(j) - a(j + 1) a(2 * j - 1) = -a(j) - a(j + 1) end do l = 2 m = mh do while (m .ge. 2) call dstsub(m, t, nc, w(nw)) if (m .gt. 4) then call cftfsub(m, t, ip, nw, w) call rftfsub(m, t, nc, w(nw)) else if (m .eq. 4) then call cftfsub(m, t, ip, nw, w) end if a(n - l) = t(1) - t(0) a(l) = t(0) + t(1) k = 0 do j = 2, m - 2, 2 k = k + 4 * l a(k - l) = -t(j) - t(j + 1) a(k + l) = t(j) - t(j + 1) end do l = 2 * l mh = m / 2 do j = 1, mh - 1 k = m - j t(j) = t(m + k) + t(m + j) t(k) = t(m + k) - t(m + j) end do t(0) = t(m + mh) m = mh end do a(l) = t(0) end if a(0) = 0 end ! ! -------- initializing routines -------- ! subroutine makewt(nw, ip, w) integer nw, ip(0 : *), j, nwh, nw0, nw1 real*8 w(0 : nw - 1), delta, wn4r, wk1r, wk1i, wk3r, wk3i ip(0) = nw ip(1) = 1 if (nw .gt. 2) then nwh = nw / 2 delta = atan(1.0d0) / nwh wn4r = cos(delta * nwh) w(0) = 1 w(1) = wn4r if (nwh .eq. 4) then w(2) = cos(delta * 2) w(3) = sin(delta * 2) else if (nwh .gt. 4) then call makeipt(nw, ip) w(2) = 0.5d0 / cos(delta * 2) w(3) = 0.5d0 / cos(delta * 6) do j = 4, nwh - 4, 4 w(j) = cos(delta * j) w(j + 1) = sin(delta * j) w(j + 2) = cos(3 * delta * j) w(j + 3) = -sin(3 * delta * j) end do end if nw0 = 0 do while (nwh .gt. 2) nw1 = nw0 + nwh nwh = nwh / 2 w(nw1) = 1 w(nw1 + 1) = wn4r if (nwh .eq. 4) then wk1r = w(nw0 + 4) wk1i = w(nw0 + 5) w(nw1 + 2) = wk1r w(nw1 + 3) = wk1i else if (nwh .gt. 4) then wk1r = w(nw0 + 4) wk3r = w(nw0 + 6) w(nw1 + 2) = 0.5d0 / wk1r w(nw1 + 3) = 0.5d0 / wk3r do j = 4, nwh - 4, 4 wk1r = w(nw0 + 2 * j) wk1i = w(nw0 + 2 * j + 1) wk3r = w(nw0 + 2 * j + 2) wk3i = w(nw0 + 2 * j + 3) w(nw1 + j) = wk1r w(nw1 + j + 1) = wk1i w(nw1 + j + 2) = wk3r w(nw1 + j + 3) = wk3i end do end if nw0 = nw1 end do end if end ! subroutine makeipt(nw, ip) integer nw, ip(0 : *), j, l, m, m2, p, q ip(2) = 0 ip(3) = 16 m = 2 l = nw do while (l .gt. 32) m2 = 2 * m q = 8 * m2 do j = m, m2 - 1 p = 4 * ip(j) ip(m + j) = p ip(m2 + j) = p + q end do m = m2 l = l / 4 end do end ! subroutine makect(nc, ip, c) integer nc, ip(0 : *), j, nch real*8 c(0 : nc - 1), delta ip(1) = nc if (nc .gt. 1) then nch = nc / 2 delta = atan(1.0d0) / nch c(0) = cos(delta * nch) c(nch) = 0.5d0 * c(0) do j = 1, nch - 1 c(j) = 0.5d0 * cos(delta * j) c(nc - j) = 0.5d0 * sin(delta * j) end do end if end ! ! -------- child routines -------- ! subroutine cftfsub(n, a, ip, nw, w) integer n, ip(0 : *), nw real*8 a(0 : n - 1), w(0 : nw - 1) if (n .gt. 8) then if (n .gt. 32) then call cftf1st(n, a, w(nw - n / 4)) if (n .gt. 512) then call cftrec4(n, a, nw, w) else if (n .gt. 128) then call cftleaf(n, 1, a, nw, w) else call cftfx41(n, a, nw, w) end if call bitrv2(n, ip, a) else if (n .eq. 32) then call cftf161(a, w(nw - 8)) call bitrv216(a) else call cftf081(a, w) call bitrv208(a) end if else if (n .eq. 8) then call cftf040(a) else if (n .eq. 4) then call cftx020(a) end if end ! subroutine cftbsub(n, a, ip, nw, w) integer n, ip(0 : *), nw real*8 a(0 : n - 1), w(0 : nw - 1) if (n .gt. 8) then if (n .gt. 32) then call cftb1st(n, a, w(nw - n / 4)) if (n .gt. 512) then call cftrec4(n, a, nw, w) else if (n .gt. 128) then call cftleaf(n, 1, a, nw, w) else call cftfx41(n, a, nw, w) end if call bitrv2conj(n, ip, a) else if (n .eq. 32) then call cftf161(a, w(nw - 8)) call bitrv216neg(a) else call cftf081(a, w) call bitrv208neg(a) end if else if (n .eq. 8) then call cftb040(a) else if (n .eq. 4) then call cftx020(a) end if end ! subroutine bitrv2(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm real*8 a(0 : n - 1), xr, xi, yr, yi m = 1 l = n / 4 do while (l .gt. 8) m = m * 2 l = l / 4 end do nh = n / 2 nm = 4 * m if (l .eq. 8) then do k = 0, m - 1 do j = 0, k - 1 j1 = 4 * j + 2 * ip(m + k) k1 = 4 * k + 2 * ip(m + j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh k1 = k1 + 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 + nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + 2 k1 = k1 + nh xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh k1 = k1 - 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 + nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 4 * k + 2 * ip(m + k) j1 = k1 + 2 k1 = k1 + nh xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - 2 k1 = k1 - nh xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh + 2 k1 = k1 + nh + 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh + nm k1 = k1 + 2 * nm - 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do else do k = 0, m - 1 do j = 0, k - 1 j1 = 4 * j + ip(m + k) k1 = 4 * k + ip(m + j) xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh k1 = k1 + 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + 2 k1 = k1 + nh xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh k1 = k1 - 2 xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 4 * k + ip(m + k) j1 = k1 + 2 k1 = k1 + nh xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + nm xr = a(j1) xi = a(j1 + 1) yr = a(k1) yi = a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do end if end ! subroutine bitrv2conj(n, ip, a) integer n, ip(0 : *), j, j1, k, k1, l, m, nh, nm real*8 a(0 : n - 1), xr, xi, yr, yi m = 1 l = n / 4 do while (l .gt. 8) m = m * 2 l = l / 4 end do nh = n / 2 nm = 4 * m if (l .eq. 8) then do k = 0, m - 1 do j = 0, k - 1 j1 = 4 * j + 2 * ip(m + k) k1 = 4 * k + 2 * ip(m + j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh k1 = k1 + 2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 + nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + 2 k1 = k1 + nh xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh k1 = k1 - 2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 + nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 4 * k + 2 * ip(m + k) j1 = k1 + 2 k1 = k1 + nh a(j1 - 1) = -a(j1 - 1) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi a(k1 + 3) = -a(k1 + 3) j1 = j1 + nm k1 = k1 + 2 * nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 - nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - 2 k1 = k1 - nh xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh + 2 k1 = k1 + nh + 2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh + nm k1 = k1 + 2 * nm - 2 a(j1 - 1) = -a(j1 - 1) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi a(k1 + 3) = -a(k1 + 3) end do else do k = 0, m - 1 do j = 0, k - 1 j1 = 4 * j + ip(m + k) k1 = 4 * k + ip(m + j) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nh k1 = k1 + 2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + 2 k1 = k1 + nh xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 + nm k1 = k1 + nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nh k1 = k1 - 2 xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi j1 = j1 - nm k1 = k1 - nm xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi end do k1 = 4 * k + ip(m + k) j1 = k1 + 2 k1 = k1 + nh a(j1 - 1) = -a(j1 - 1) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi a(k1 + 3) = -a(k1 + 3) j1 = j1 + nm k1 = k1 + nm a(j1 - 1) = -a(j1 - 1) xr = a(j1) xi = -a(j1 + 1) yr = a(k1) yi = -a(k1 + 1) a(j1) = yr a(j1 + 1) = yi a(k1) = xr a(k1 + 1) = xi a(k1 + 3) = -a(k1 + 3) end do end if end ! subroutine bitrv216(a) real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i real*8 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i real*8 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i x1r = a(2) x1i = a(3) x2r = a(4) x2i = a(5) x3r = a(6) x3i = a(7) x4r = a(8) x4i = a(9) x5r = a(10) x5i = a(11) x7r = a(14) x7i = a(15) x8r = a(16) x8i = a(17) x10r = a(20) x10i = a(21) x11r = a(22) x11i = a(23) x12r = a(24) x12i = a(25) x13r = a(26) x13i = a(27) x14r = a(28) x14i = a(29) a(2) = x8r a(3) = x8i a(4) = x4r a(5) = x4i a(6) = x12r a(7) = x12i a(8) = x2r a(9) = x2i a(10) = x10r a(11) = x10i a(14) = x14r a(15) = x14i a(16) = x1r a(17) = x1i a(20) = x5r a(21) = x5i a(22) = x13r a(23) = x13i a(24) = x3r a(25) = x3i a(26) = x11r a(27) = x11i a(28) = x7r a(29) = x7i end ! subroutine bitrv216neg(a) real*8 a(0 : 31), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i real*8 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i real*8 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i real*8 x13r, x13i, x14r, x14i, x15r, x15i x1r = a(2) x1i = a(3) x2r = a(4) x2i = a(5) x3r = a(6) x3i = a(7) x4r = a(8) x4i = a(9) x5r = a(10) x5i = a(11) x6r = a(12) x6i = a(13) x7r = a(14) x7i = a(15) x8r = a(16) x8i = a(17) x9r = a(18) x9i = a(19) x10r = a(20) x10i = a(21) x11r = a(22) x11i = a(23) x12r = a(24) x12i = a(25) x13r = a(26) x13i = a(27) x14r = a(28) x14i = a(29) x15r = a(30) x15i = a(31) a(2) = x15r a(3) = x15i a(4) = x7r a(5) = x7i a(6) = x11r a(7) = x11i a(8) = x3r a(9) = x3i a(10) = x13r a(11) = x13i a(12) = x5r a(13) = x5i a(14) = x9r a(15) = x9i a(16) = x1r a(17) = x1i a(18) = x14r a(19) = x14i a(20) = x6r a(21) = x6i a(22) = x10r a(23) = x10i a(24) = x2r a(25) = x2i a(26) = x12r a(27) = x12i a(28) = x4r a(29) = x4i a(30) = x8r a(31) = x8i end ! subroutine bitrv208(a) real*8 a(0 : 15), x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i x1r = a(2) x1i = a(3) x3r = a(6) x3i = a(7) x4r = a(8) x4i = a(9) x6r = a(12) x6i = a(13) a(2) = x4r a(3) = x4i a(6) = x6r a(7) = x6i a(8) = x1r a(9) = x1i a(12) = x3r a(13) = x3i end ! subroutine bitrv208neg(a) real*8 a(0 : 15), x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i real*8 x5r, x5i, x6r, x6i, x7r, x7i x1r = a(2) x1i = a(3) x2r = a(4) x2i = a(5) x3r = a(6) x3i = a(7) x4r = a(8) x4i = a(9) x5r = a(10) x5i = a(11) x6r = a(12) x6i = a(13) x7r = a(14) x7i = a(15) a(2) = x7r a(3) = x7i a(4) = x3r a(5) = x3i a(6) = x5r a(7) = x5i a(8) = x1r a(9) = x1i a(10) = x6r a(11) = x6i a(12) = x2r a(13) = x2i a(14) = x4r a(15) = x4i end ! subroutine cftf1st(n, a, w) integer n, j, j0, j1, j2, j3, k, m, mh real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i real*8 wd1r, wd1i, wd3r, wd3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i mh = n / 8 m = 2 * mh j1 = m j2 = j1 + m j3 = j2 + m x0r = a(0) + a(j2) x0i = a(1) + a(j2 + 1) x1r = a(0) - a(j2) x1i = a(1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(0) = x0r + x2r a(1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i a(j2) = x1r - x3i a(j2 + 1) = x1i + x3r a(j3) = x1r + x3i a(j3 + 1) = x1i - x3r wn4r = w(1) csc1 = w(2) csc3 = w(3) wd1r = 1 wd1i = 0 wd3r = 1 wd3i = 0 k = 0 do j = 2, mh - 6, 4 k = k + 4 wk1r = csc1 * (wd1r + w(k)) wk1i = csc1 * (wd1i + w(k + 1)) wk3r = csc3 * (wd3r + w(k + 2)) wk3i = csc3 * (wd3i + w(k + 3)) wd1r = w(k) wd1i = w(k + 1) wd3r = w(k + 2) wd3i = w(k + 3) j1 = j + m j2 = j1 + m j3 = j2 + m x0r = a(j) + a(j2) x0i = a(j + 1) + a(j2 + 1) x1r = a(j) - a(j2) x1i = a(j + 1) - a(j2 + 1) y0r = a(j + 2) + a(j2 + 2) y0i = a(j + 3) + a(j2 + 3) y1r = a(j + 2) - a(j2 + 2) y1i = a(j + 3) - a(j2 + 3) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) y2r = a(j1 + 2) + a(j3 + 2) y2i = a(j1 + 3) + a(j3 + 3) y3r = a(j1 + 2) - a(j3 + 2) y3i = a(j1 + 3) - a(j3 + 3) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j + 2) = y0r + y2r a(j + 3) = y0i + y2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i a(j1 + 2) = y0r - y2r a(j1 + 3) = y0i - y2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wk1r * x0r - wk1i * x0i a(j2 + 1) = wk1r * x0i + wk1i * x0r x0r = y1r - y3i x0i = y1i + y3r a(j2 + 2) = wd1r * x0r - wd1i * x0i a(j2 + 3) = wd1r * x0i + wd1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3r * x0r + wk3i * x0i a(j3 + 1) = wk3r * x0i - wk3i * x0r x0r = y1r + y3i x0i = y1i - y3r a(j3 + 2) = wd3r * x0r + wd3i * x0i a(j3 + 3) = wd3r * x0i - wd3i * x0r j0 = m - j j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) + a(j2) x0i = a(j0 + 1) + a(j2 + 1) x1r = a(j0) - a(j2) x1i = a(j0 + 1) - a(j2 + 1) y0r = a(j0 - 2) + a(j2 - 2) y0i = a(j0 - 1) + a(j2 - 1) y1r = a(j0 - 2) - a(j2 - 2) y1i = a(j0 - 1) - a(j2 - 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) y2r = a(j1 - 2) + a(j3 - 2) y2i = a(j1 - 1) + a(j3 - 1) y3r = a(j1 - 2) - a(j3 - 2) y3i = a(j1 - 1) - a(j3 - 1) a(j0) = x0r + x2r a(j0 + 1) = x0i + x2i a(j0 - 2) = y0r + y2r a(j0 - 1) = y0i + y2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i a(j1 - 2) = y0r - y2r a(j1 - 1) = y0i - y2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wk1i * x0r - wk1r * x0i a(j2 + 1) = wk1i * x0i + wk1r * x0r x0r = y1r - y3i x0i = y1i + y3r a(j2 - 2) = wd1i * x0r - wd1r * x0i a(j2 - 1) = wd1i * x0i + wd1r * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3i * x0r + wk3r * x0i a(j3 + 1) = wk3i * x0i - wk3r * x0r x0r = y1r + y3i x0i = y1i - y3r a(j3 - 2) = wd3i * x0r + wd3r * x0i a(j3 - 1) = wd3i * x0i - wd3r * x0r end do wk1r = csc1 * (wd1r + wn4r) wk1i = csc1 * (wd1i + wn4r) wk3r = csc3 * (wd3r - wn4r) wk3i = csc3 * (wd3i - wn4r) j0 = mh j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0 - 2) + a(j2 - 2) x0i = a(j0 - 1) + a(j2 - 1) x1r = a(j0 - 2) - a(j2 - 2) x1i = a(j0 - 1) - a(j2 - 1) x2r = a(j1 - 2) + a(j3 - 2) x2i = a(j1 - 1) + a(j3 - 1) x3r = a(j1 - 2) - a(j3 - 2) x3i = a(j1 - 1) - a(j3 - 1) a(j0 - 2) = x0r + x2r a(j0 - 1) = x0i + x2i a(j1 - 2) = x0r - x2r a(j1 - 1) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2 - 2) = wk1r * x0r - wk1i * x0i a(j2 - 1) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3 - 2) = wk3r * x0r + wk3i * x0i a(j3 - 1) = wk3r * x0i - wk3i * x0r x0r = a(j0) + a(j2) x0i = a(j0 + 1) + a(j2 + 1) x1r = a(j0) - a(j2) x1i = a(j0 + 1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(j0) = x0r + x2r a(j0 + 1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wn4r * (x0r - x0i) a(j2 + 1) = wn4r * (x0i + x0r) x0r = x1r + x3i x0i = x1i - x3r a(j3) = -wn4r * (x0r + x0i) a(j3 + 1) = -wn4r * (x0i - x0r) x0r = a(j0 + 2) + a(j2 + 2) x0i = a(j0 + 3) + a(j2 + 3) x1r = a(j0 + 2) - a(j2 + 2) x1i = a(j0 + 3) - a(j2 + 3) x2r = a(j1 + 2) + a(j3 + 2) x2i = a(j1 + 3) + a(j3 + 3) x3r = a(j1 + 2) - a(j3 + 2) x3i = a(j1 + 3) - a(j3 + 3) a(j0 + 2) = x0r + x2r a(j0 + 3) = x0i + x2i a(j1 + 2) = x0r - x2r a(j1 + 3) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2 + 2) = wk1i * x0r - wk1r * x0i a(j2 + 3) = wk1i * x0i + wk1r * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3 + 2) = wk3i * x0r + wk3r * x0i a(j3 + 3) = wk3i * x0i - wk3r * x0r end ! subroutine cftb1st(n, a, w) integer n, j, j0, j1, j2, j3, k, m, mh real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i real*8 wd1r, wd1i, wd3r, wd3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i mh = n / 8 m = 2 * mh j1 = m j2 = j1 + m j3 = j2 + m x0r = a(0) + a(j2) x0i = -a(1) - a(j2 + 1) x1r = a(0) - a(j2) x1i = -a(1) + a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(0) = x0r + x2r a(1) = x0i - x2i a(j1) = x0r - x2r a(j1 + 1) = x0i + x2i a(j2) = x1r + x3i a(j2 + 1) = x1i + x3r a(j3) = x1r - x3i a(j3 + 1) = x1i - x3r wn4r = w(1) csc1 = w(2) csc3 = w(3) wd1r = 1 wd1i = 0 wd3r = 1 wd3i = 0 k = 0 do j = 2, mh - 6, 4 k = k + 4 wk1r = csc1 * (wd1r + w(k)) wk1i = csc1 * (wd1i + w(k + 1)) wk3r = csc3 * (wd3r + w(k + 2)) wk3i = csc3 * (wd3i + w(k + 3)) wd1r = w(k) wd1i = w(k + 1) wd3r = w(k + 2) wd3i = w(k + 3) j1 = j + m j2 = j1 + m j3 = j2 + m x0r = a(j) + a(j2) x0i = -a(j + 1) - a(j2 + 1) x1r = a(j) - a(j2) x1i = -a(j + 1) + a(j2 + 1) y0r = a(j + 2) + a(j2 + 2) y0i = -a(j + 3) - a(j2 + 3) y1r = a(j + 2) - a(j2 + 2) y1i = -a(j + 3) + a(j2 + 3) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) y2r = a(j1 + 2) + a(j3 + 2) y2i = a(j1 + 3) + a(j3 + 3) y3r = a(j1 + 2) - a(j3 + 2) y3i = a(j1 + 3) - a(j3 + 3) a(j) = x0r + x2r a(j + 1) = x0i - x2i a(j + 2) = y0r + y2r a(j + 3) = y0i - y2i a(j1) = x0r - x2r a(j1 + 1) = x0i + x2i a(j1 + 2) = y0r - y2r a(j1 + 3) = y0i + y2i x0r = x1r + x3i x0i = x1i + x3r a(j2) = wk1r * x0r - wk1i * x0i a(j2 + 1) = wk1r * x0i + wk1i * x0r x0r = y1r + y3i x0i = y1i + y3r a(j2 + 2) = wd1r * x0r - wd1i * x0i a(j2 + 3) = wd1r * x0i + wd1i * x0r x0r = x1r - x3i x0i = x1i - x3r a(j3) = wk3r * x0r + wk3i * x0i a(j3 + 1) = wk3r * x0i - wk3i * x0r x0r = y1r - y3i x0i = y1i - y3r a(j3 + 2) = wd3r * x0r + wd3i * x0i a(j3 + 3) = wd3r * x0i - wd3i * x0r j0 = m - j j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) + a(j2) x0i = -a(j0 + 1) - a(j2 + 1) x1r = a(j0) - a(j2) x1i = -a(j0 + 1) + a(j2 + 1) y0r = a(j0 - 2) + a(j2 - 2) y0i = -a(j0 - 1) - a(j2 - 1) y1r = a(j0 - 2) - a(j2 - 2) y1i = -a(j0 - 1) + a(j2 - 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) y2r = a(j1 - 2) + a(j3 - 2) y2i = a(j1 - 1) + a(j3 - 1) y3r = a(j1 - 2) - a(j3 - 2) y3i = a(j1 - 1) - a(j3 - 1) a(j0) = x0r + x2r a(j0 + 1) = x0i - x2i a(j0 - 2) = y0r + y2r a(j0 - 1) = y0i - y2i a(j1) = x0r - x2r a(j1 + 1) = x0i + x2i a(j1 - 2) = y0r - y2r a(j1 - 1) = y0i + y2i x0r = x1r + x3i x0i = x1i + x3r a(j2) = wk1i * x0r - wk1r * x0i a(j2 + 1) = wk1i * x0i + wk1r * x0r x0r = y1r + y3i x0i = y1i + y3r a(j2 - 2) = wd1i * x0r - wd1r * x0i a(j2 - 1) = wd1i * x0i + wd1r * x0r x0r = x1r - x3i x0i = x1i - x3r a(j3) = wk3i * x0r + wk3r * x0i a(j3 + 1) = wk3i * x0i - wk3r * x0r x0r = y1r - y3i x0i = y1i - y3r a(j3 - 2) = wd3i * x0r + wd3r * x0i a(j3 - 1) = wd3i * x0i - wd3r * x0r end do wk1r = csc1 * (wd1r + wn4r) wk1i = csc1 * (wd1i + wn4r) wk3r = csc3 * (wd3r - wn4r) wk3i = csc3 * (wd3i - wn4r) j0 = mh j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0 - 2) + a(j2 - 2) x0i = -a(j0 - 1) - a(j2 - 1) x1r = a(j0 - 2) - a(j2 - 2) x1i = -a(j0 - 1) + a(j2 - 1) x2r = a(j1 - 2) + a(j3 - 2) x2i = a(j1 - 1) + a(j3 - 1) x3r = a(j1 - 2) - a(j3 - 2) x3i = a(j1 - 1) - a(j3 - 1) a(j0 - 2) = x0r + x2r a(j0 - 1) = x0i - x2i a(j1 - 2) = x0r - x2r a(j1 - 1) = x0i + x2i x0r = x1r + x3i x0i = x1i + x3r a(j2 - 2) = wk1r * x0r - wk1i * x0i a(j2 - 1) = wk1r * x0i + wk1i * x0r x0r = x1r - x3i x0i = x1i - x3r a(j3 - 2) = wk3r * x0r + wk3i * x0i a(j3 - 1) = wk3r * x0i - wk3i * x0r x0r = a(j0) + a(j2) x0i = -a(j0 + 1) - a(j2 + 1) x1r = a(j0) - a(j2) x1i = -a(j0 + 1) + a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(j0) = x0r + x2r a(j0 + 1) = x0i - x2i a(j1) = x0r - x2r a(j1 + 1) = x0i + x2i x0r = x1r + x3i x0i = x1i + x3r a(j2) = wn4r * (x0r - x0i) a(j2 + 1) = wn4r * (x0i + x0r) x0r = x1r - x3i x0i = x1i - x3r a(j3) = -wn4r * (x0r + x0i) a(j3 + 1) = -wn4r * (x0i - x0r) x0r = a(j0 + 2) + a(j2 + 2) x0i = -a(j0 + 3) - a(j2 + 3) x1r = a(j0 + 2) - a(j2 + 2) x1i = -a(j0 + 3) + a(j2 + 3) x2r = a(j1 + 2) + a(j3 + 2) x2i = a(j1 + 3) + a(j3 + 3) x3r = a(j1 + 2) - a(j3 + 2) x3i = a(j1 + 3) - a(j3 + 3) a(j0 + 2) = x0r + x2r a(j0 + 3) = x0i - x2i a(j1 + 2) = x0r - x2r a(j1 + 3) = x0i + x2i x0r = x1r + x3i x0i = x1i + x3r a(j2 + 2) = wk1i * x0r - wk1r * x0i a(j2 + 3) = wk1i * x0i + wk1r * x0r x0r = x1r - x3i x0i = x1i - x3r a(j3 + 2) = wk3i * x0r + wk3r * x0i a(j3 + 3) = wk3i * x0i - wk3r * x0r end ! subroutine cftrec4(n, a, nw, w) integer n, nw, cfttree, isplt, j, k, m real*8 a(0 : n - 1), w(0 : nw - 1) m = n do while (m .gt. 512) m = m / 4 call cftmdl1(m, a(n - m), w(nw - m / 2)) end do call cftleaf(m, 1, a(n - m), nw, w) k = 0 do j = n - m, m, -m k = k + 1 isplt = cfttree(m, j, k, a, nw, w) call cftleaf(m, isplt, a(j - m), nw, w) end do end ! integer function cfttree(n, j, k, a, nw, w) integer n, j, k, nw, i, isplt, m real*8 a(0 : j - 1), w(0 : nw - 1) if (mod(k, 4) .ne. 0) then isplt = mod(k, 2) if (isplt .ne. 0) then call cftmdl1(n, a(j - n), w(nw - n / 2)) else call cftmdl2(n, a(j - n), w(nw - n)) end if else m = n i = k do while (mod(i, 4) .eq. 0) m = m * 4 i = i / 4 end do isplt = mod(i, 2) if (isplt .ne. 0) then do while (m .gt. 128) call cftmdl1(m, a(j - m), w(nw - m / 2)) m = m / 4 end do else do while (m .gt. 128) call cftmdl2(m, a(j - m), w(nw - m)) m = m / 4 end do end if end if cfttree = isplt end ! subroutine cftleaf(n, isplt, a, nw, w) integer n, isplt, nw real*8 a(0 : n - 1), w(0 : nw - 1) if (n .eq. 512) then call cftmdl1(128, a, w(nw - 64)) call cftf161(a, w(nw - 8)) call cftf162(a(32), w(nw - 32)) call cftf161(a(64), w(nw - 8)) call cftf161(a(96), w(nw - 8)) call cftmdl2(128, a(128), w(nw - 128)) call cftf161(a(128), w(nw - 8)) call cftf162(a(160), w(nw - 32)) call cftf161(a(192), w(nw - 8)) call cftf162(a(224), w(nw - 32)) call cftmdl1(128, a(256), w(nw - 64)) call cftf161(a(256), w(nw - 8)) call cftf162(a(288), w(nw - 32)) call cftf161(a(320), w(nw - 8)) call cftf161(a(352), w(nw - 8)) if (isplt .ne. 0) then call cftmdl1(128, a(384), w(nw - 64)) call cftf161(a(480), w(nw - 8)) else call cftmdl2(128, a(384), w(nw - 128)) call cftf162(a(480), w(nw - 32)) end if call cftf161(a(384), w(nw - 8)) call cftf162(a(416), w(nw - 32)) call cftf161(a(448), w(nw - 8)) else call cftmdl1(64, a, w(nw - 32)) call cftf081(a, w(nw - 8)) call cftf082(a(16), w(nw - 8)) call cftf081(a(32), w(nw - 8)) call cftf081(a(48), w(nw - 8)) call cftmdl2(64, a(64), w(nw - 64)) call cftf081(a(64), w(nw - 8)) call cftf082(a(80), w(nw - 8)) call cftf081(a(96), w(nw - 8)) call cftf082(a(112), w(nw - 8)) call cftmdl1(64, a(128), w(nw - 32)) call cftf081(a(128), w(nw - 8)) call cftf082(a(144), w(nw - 8)) call cftf081(a(160), w(nw - 8)) call cftf081(a(176), w(nw - 8)) if (isplt .ne. 0) then call cftmdl1(64, a(192), w(nw - 32)) call cftf081(a(240), w(nw - 8)) else call cftmdl2(64, a(192), w(nw - 64)) call cftf082(a(240), w(nw - 8)) end if call cftf081(a(192), w(nw - 8)) call cftf082(a(208), w(nw - 8)) call cftf081(a(224), w(nw - 8)) end if end ! subroutine cftmdl1(n, a, w) integer n, j, j0, j1, j2, j3, k, m, mh real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, wk1r, wk1i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i mh = n / 8 m = 2 * mh j1 = m j2 = j1 + m j3 = j2 + m x0r = a(0) + a(j2) x0i = a(1) + a(j2 + 1) x1r = a(0) - a(j2) x1i = a(1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(0) = x0r + x2r a(1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i a(j2) = x1r - x3i a(j2 + 1) = x1i + x3r a(j3) = x1r + x3i a(j3 + 1) = x1i - x3r wn4r = w(1) k = 0 do j = 2, mh - 2, 2 k = k + 4 wk1r = w(k) wk1i = w(k + 1) wk3r = w(k + 2) wk3i = w(k + 3) j1 = j + m j2 = j1 + m j3 = j2 + m x0r = a(j) + a(j2) x0i = a(j + 1) + a(j2 + 1) x1r = a(j) - a(j2) x1i = a(j + 1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(j) = x0r + x2r a(j + 1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wk1r * x0r - wk1i * x0i a(j2 + 1) = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3r * x0r + wk3i * x0i a(j3 + 1) = wk3r * x0i - wk3i * x0r j0 = m - j j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) + a(j2) x0i = a(j0 + 1) + a(j2 + 1) x1r = a(j0) - a(j2) x1i = a(j0 + 1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(j0) = x0r + x2r a(j0 + 1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wk1i * x0r - wk1r * x0i a(j2 + 1) = wk1i * x0i + wk1r * x0r x0r = x1r + x3i x0i = x1i - x3r a(j3) = wk3i * x0r + wk3r * x0i a(j3 + 1) = wk3i * x0i - wk3r * x0r end do j0 = mh j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) + a(j2) x0i = a(j0 + 1) + a(j2 + 1) x1r = a(j0) - a(j2) x1i = a(j0 + 1) - a(j2 + 1) x2r = a(j1) + a(j3) x2i = a(j1 + 1) + a(j3 + 1) x3r = a(j1) - a(j3) x3i = a(j1 + 1) - a(j3 + 1) a(j0) = x0r + x2r a(j0 + 1) = x0i + x2i a(j1) = x0r - x2r a(j1 + 1) = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r a(j2) = wn4r * (x0r - x0i) a(j2 + 1) = wn4r * (x0i + x0r) x0r = x1r + x3i x0i = x1i - x3r a(j3) = -wn4r * (x0r + x0i) a(j3 + 1) = -wn4r * (x0i - x0r) end ! subroutine cftmdl2(n, a, w) integer n, j, j0, j1, j2, j3, k, kr, m, mh real*8 a(0 : n - 1), w(0 : *) real*8 wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y2r, y2i mh = n / 8 m = 2 * mh wn4r = w(1) j1 = m j2 = j1 + m j3 = j2 + m x0r = a(0) - a(j2 + 1) x0i = a(1) + a(j2) x1r = a(0) + a(j2 + 1) x1i = a(1) - a(j2) x2r = a(j1) - a(j3 + 1) x2i = a(j1 + 1) + a(j3) x3r = a(j1) + a(j3 + 1) x3i = a(j1 + 1) - a(j3) y0r = wn4r * (x2r - x2i) y0i = wn4r * (x2i + x2r) a(0) = x0r + y0r a(1) = x0i + y0i a(j1) = x0r - y0r a(j1 + 1) = x0i - y0i y0r = wn4r * (x3r - x3i) y0i = wn4r * (x3i + x3r) a(j2) = x1r - y0i a(j2 + 1) = x1i + y0r a(j3) = x1r + y0i a(j3 + 1) = x1i - y0r k = 0 kr = 2 * m do j = 2, mh - 2, 2 k = k + 4 wk1r = w(k) wk1i = w(k + 1) wk3r = w(k + 2) wk3i = w(k + 3) kr = kr - 4 wd1i = w(kr) wd1r = w(kr + 1) wd3i = w(kr + 2) wd3r = w(kr + 3) j1 = j + m j2 = j1 + m j3 = j2 + m x0r = a(j) - a(j2 + 1) x0i = a(j + 1) + a(j2) x1r = a(j) + a(j2 + 1) x1i = a(j + 1) - a(j2) x2r = a(j1) - a(j3 + 1) x2i = a(j1 + 1) + a(j3) x3r = a(j1) + a(j3 + 1) x3i = a(j1 + 1) - a(j3) y0r = wk1r * x0r - wk1i * x0i y0i = wk1r * x0i + wk1i * x0r y2r = wd1r * x2r - wd1i * x2i y2i = wd1r * x2i + wd1i * x2r a(j) = y0r + y2r a(j + 1) = y0i + y2i a(j1) = y0r - y2r a(j1 + 1) = y0i - y2i y0r = wk3r * x1r + wk3i * x1i y0i = wk3r * x1i - wk3i * x1r y2r = wd3r * x3r + wd3i * x3i y2i = wd3r * x3i - wd3i * x3r a(j2) = y0r + y2r a(j2 + 1) = y0i + y2i a(j3) = y0r - y2r a(j3 + 1) = y0i - y2i j0 = m - j j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) - a(j2 + 1) x0i = a(j0 + 1) + a(j2) x1r = a(j0) + a(j2 + 1) x1i = a(j0 + 1) - a(j2) x2r = a(j1) - a(j3 + 1) x2i = a(j1 + 1) + a(j3) x3r = a(j1) + a(j3 + 1) x3i = a(j1 + 1) - a(j3) y0r = wd1i * x0r - wd1r * x0i y0i = wd1i * x0i + wd1r * x0r y2r = wk1i * x2r - wk1r * x2i y2i = wk1i * x2i + wk1r * x2r a(j0) = y0r + y2r a(j0 + 1) = y0i + y2i a(j1) = y0r - y2r a(j1 + 1) = y0i - y2i y0r = wd3i * x1r + wd3r * x1i y0i = wd3i * x1i - wd3r * x1r y2r = wk3i * x3r + wk3r * x3i y2i = wk3i * x3i - wk3r * x3r a(j2) = y0r + y2r a(j2 + 1) = y0i + y2i a(j3) = y0r - y2r a(j3 + 1) = y0i - y2i end do wk1r = w(m) wk1i = w(m + 1) j0 = mh j1 = j0 + m j2 = j1 + m j3 = j2 + m x0r = a(j0) - a(j2 + 1) x0i = a(j0 + 1) + a(j2) x1r = a(j0) + a(j2 + 1) x1i = a(j0 + 1) - a(j2) x2r = a(j1) - a(j3 + 1) x2i = a(j1 + 1) + a(j3) x3r = a(j1) + a(j3 + 1) x3i = a(j1 + 1) - a(j3) y0r = wk1r * x0r - wk1i * x0i y0i = wk1r * x0i + wk1i * x0r y2r = wk1i * x2r - wk1r * x2i y2i = wk1i * x2i + wk1r * x2r a(j0) = y0r + y2r a(j0 + 1) = y0i + y2i a(j1) = y0r - y2r a(j1 + 1) = y0i - y2i y0r = wk1i * x1r - wk1r * x1i y0i = wk1i * x1i + wk1r * x1r y2r = wk1r * x3r - wk1i * x3i y2i = wk1r * x3i + wk1i * x3r a(j2) = y0r - y2r a(j2 + 1) = y0i - y2i a(j3) = y0r + y2r a(j3 + 1) = y0i + y2i end ! subroutine cftfx41(n, a, nw, w) integer n, nw real*8 a(0 : n - 1), w(0 : nw - 1) if (n .eq. 128) then call cftf161(a, w(nw - 8)) call cftf162(a(32), w(nw - 32)) call cftf161(a(64), w(nw - 8)) call cftf161(a(96), w(nw - 8)) else call cftf081(a, w(nw - 8)) call cftf082(a(16), w(nw - 8)) call cftf081(a(32), w(nw - 8)) call cftf081(a(48), w(nw - 8)) end if end ! subroutine cftf161(a, w) real*8 a(0 : 31), w(0 : *), wn4r, wk1r, wk1i real*8 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i wn4r = w(1) wk1r = w(2) wk1i = w(3) x0r = a(0) + a(16) x0i = a(1) + a(17) x1r = a(0) - a(16) x1i = a(1) - a(17) x2r = a(8) + a(24) x2i = a(9) + a(25) x3r = a(8) - a(24) x3i = a(9) - a(25) y0r = x0r + x2r y0i = x0i + x2i y4r = x0r - x2r y4i = x0i - x2i y8r = x1r - x3i y8i = x1i + x3r y12r = x1r + x3i y12i = x1i - x3r x0r = a(2) + a(18) x0i = a(3) + a(19) x1r = a(2) - a(18) x1i = a(3) - a(19) x2r = a(10) + a(26) x2i = a(11) + a(27) x3r = a(10) - a(26) x3i = a(11) - a(27) y1r = x0r + x2r y1i = x0i + x2i y5r = x0r - x2r y5i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r y9r = wk1r * x0r - wk1i * x0i y9i = wk1r * x0i + wk1i * x0r x0r = x1r + x3i x0i = x1i - x3r y13r = wk1i * x0r - wk1r * x0i y13i = wk1i * x0i + wk1r * x0r x0r = a(4) + a(20) x0i = a(5) + a(21) x1r = a(4) - a(20) x1i = a(5) - a(21) x2r = a(12) + a(28) x2i = a(13) + a(29) x3r = a(12) - a(28) x3i = a(13) - a(29) y2r = x0r + x2r y2i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r y10r = wn4r * (x0r - x0i) y10i = wn4r * (x0i + x0r) x0r = x1r + x3i x0i = x1i - x3r y14r = wn4r * (x0r + x0i) y14i = wn4r * (x0i - x0r) x0r = a(6) + a(22) x0i = a(7) + a(23) x1r = a(6) - a(22) x1i = a(7) - a(23) x2r = a(14) + a(30) x2i = a(15) + a(31) x3r = a(14) - a(30) x3i = a(15) - a(31) y3r = x0r + x2r y3i = x0i + x2i y7r = x0r - x2r y7i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r y11r = wk1i * x0r - wk1r * x0i y11i = wk1i * x0i + wk1r * x0r x0r = x1r + x3i x0i = x1i - x3r y15r = wk1r * x0r - wk1i * x0i y15i = wk1r * x0i + wk1i * x0r x0r = y12r - y14r x0i = y12i - y14i x1r = y12r + y14r x1i = y12i + y14i x2r = y13r - y15r x2i = y13i - y15i x3r = y13r + y15r x3i = y13i + y15i a(24) = x0r + x2r a(25) = x0i + x2i a(26) = x0r - x2r a(27) = x0i - x2i a(28) = x1r - x3i a(29) = x1i + x3r a(30) = x1r + x3i a(31) = x1i - x3r x0r = y8r + y10r x0i = y8i + y10i x1r = y8r - y10r x1i = y8i - y10i x2r = y9r + y11r x2i = y9i + y11i x3r = y9r - y11r x3i = y9i - y11i a(16) = x0r + x2r a(17) = x0i + x2i a(18) = x0r - x2r a(19) = x0i - x2i a(20) = x1r - x3i a(21) = x1i + x3r a(22) = x1r + x3i a(23) = x1i - x3r x0r = y5r - y7i x0i = y5i + y7r x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) x0r = y5r + y7i x0i = y5i - y7r x3r = wn4r * (x0r - x0i) x3i = wn4r * (x0i + x0r) x0r = y4r - y6i x0i = y4i + y6r x1r = y4r + y6i x1i = y4i - y6r a(8) = x0r + x2r a(9) = x0i + x2i a(10) = x0r - x2r a(11) = x0i - x2i a(12) = x1r - x3i a(13) = x1i + x3r a(14) = x1r + x3i a(15) = x1i - x3r x0r = y0r + y2r x0i = y0i + y2i x1r = y0r - y2r x1i = y0i - y2i x2r = y1r + y3r x2i = y1i + y3i x3r = y1r - y3r x3i = y1i - y3i a(0) = x0r + x2r a(1) = x0i + x2i a(2) = x0r - x2r a(3) = x0i - x2i a(4) = x1r - x3i a(5) = x1i + x3r a(6) = x1r + x3i a(7) = x1i - x3r end ! subroutine cftf162(a, w) real*8 a(0 : 31), w(0 : *) real*8 wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i real*8 x0r, x0i, x1r, x1i, x2r, x2i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i real*8 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i real*8 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i wn4r = w(1) wk1r = w(4) wk1i = w(5) wk3r = w(6) wk3i = -w(7) wk2r = w(8) wk2i = w(9) x1r = a(0) - a(17) x1i = a(1) + a(16) x0r = a(8) - a(25) x0i = a(9) + a(24) x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) y0r = x1r + x2r y0i = x1i + x2i y4r = x1r - x2r y4i = x1i - x2i x1r = a(0) + a(17) x1i = a(1) - a(16) x0r = a(8) + a(25) x0i = a(9) - a(24) x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) y8r = x1r - x2i y8i = x1i + x2r y12r = x1r + x2i y12i = x1i - x2r x0r = a(2) - a(19) x0i = a(3) + a(18) x1r = wk1r * x0r - wk1i * x0i x1i = wk1r * x0i + wk1i * x0r x0r = a(10) - a(27) x0i = a(11) + a(26) x2r = wk3i * x0r - wk3r * x0i x2i = wk3i * x0i + wk3r * x0r y1r = x1r + x2r y1i = x1i + x2i y5r = x1r - x2r y5i = x1i - x2i x0r = a(2) + a(19) x0i = a(3) - a(18) x1r = wk3r * x0r - wk3i * x0i x1i = wk3r * x0i + wk3i * x0r x0r = a(10) + a(27) x0i = a(11) - a(26) x2r = wk1r * x0r + wk1i * x0i x2i = wk1r * x0i - wk1i * x0r y9r = x1r - x2r y9i = x1i - x2i y13r = x1r + x2r y13i = x1i + x2i x0r = a(4) - a(21) x0i = a(5) + a(20) x1r = wk2r * x0r - wk2i * x0i x1i = wk2r * x0i + wk2i * x0r x0r = a(12) - a(29) x0i = a(13) + a(28) x2r = wk2i * x0r - wk2r * x0i x2i = wk2i * x0i + wk2r * x0r y2r = x1r + x2r y2i = x1i + x2i y6r = x1r - x2r y6i = x1i - x2i x0r = a(4) + a(21) x0i = a(5) - a(20) x1r = wk2i * x0r - wk2r * x0i x1i = wk2i * x0i + wk2r * x0r x0r = a(12) + a(29) x0i = a(13) - a(28) x2r = wk2r * x0r - wk2i * x0i x2i = wk2r * x0i + wk2i * x0r y10r = x1r - x2r y10i = x1i - x2i y14r = x1r + x2r y14i = x1i + x2i x0r = a(6) - a(23) x0i = a(7) + a(22) x1r = wk3r * x0r - wk3i * x0i x1i = wk3r * x0i + wk3i * x0r x0r = a(14) - a(31) x0i = a(15) + a(30) x2r = wk1i * x0r - wk1r * x0i x2i = wk1i * x0i + wk1r * x0r y3r = x1r + x2r y3i = x1i + x2i y7r = x1r - x2r y7i = x1i - x2i x0r = a(6) + a(23) x0i = a(7) - a(22) x1r = wk1i * x0r + wk1r * x0i x1i = wk1i * x0i - wk1r * x0r x0r = a(14) + a(31) x0i = a(15) - a(30) x2r = wk3i * x0r - wk3r * x0i x2i = wk3i * x0i + wk3r * x0r y11r = x1r + x2r y11i = x1i + x2i y15r = x1r - x2r y15i = x1i - x2i x1r = y0r + y2r x1i = y0i + y2i x2r = y1r + y3r x2i = y1i + y3i a(0) = x1r + x2r a(1) = x1i + x2i a(2) = x1r - x2r a(3) = x1i - x2i x1r = y0r - y2r x1i = y0i - y2i x2r = y1r - y3r x2i = y1i - y3i a(4) = x1r - x2i a(5) = x1i + x2r a(6) = x1r + x2i a(7) = x1i - x2r x1r = y4r - y6i x1i = y4i + y6r x0r = y5r - y7i x0i = y5i + y7r x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) a(8) = x1r + x2r a(9) = x1i + x2i a(10) = x1r - x2r a(11) = x1i - x2i x1r = y4r + y6i x1i = y4i - y6r x0r = y5r + y7i x0i = y5i - y7r x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) a(12) = x1r - x2i a(13) = x1i + x2r a(14) = x1r + x2i a(15) = x1i - x2r x1r = y8r + y10r x1i = y8i + y10i x2r = y9r - y11r x2i = y9i - y11i a(16) = x1r + x2r a(17) = x1i + x2i a(18) = x1r - x2r a(19) = x1i - x2i x1r = y8r - y10r x1i = y8i - y10i x2r = y9r + y11r x2i = y9i + y11i a(20) = x1r - x2i a(21) = x1i + x2r a(22) = x1r + x2i a(23) = x1i - x2r x1r = y12r - y14i x1i = y12i + y14r x0r = y13r + y15i x0i = y13i - y15r x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) a(24) = x1r + x2r a(25) = x1i + x2i a(26) = x1r - x2r a(27) = x1i - x2i x1r = y12r + y14i x1i = y12i - y14r x0r = y13r - y15i x0i = y13i + y15r x2r = wn4r * (x0r - x0i) x2i = wn4r * (x0i + x0r) a(28) = x1r - x2i a(29) = x1i + x2r a(30) = x1r + x2i a(31) = x1i - x2r end ! subroutine cftf081(a, w) real*8 a(0 : 15), w(0 : *) real*8 wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i wn4r = w(1) x0r = a(0) + a(8) x0i = a(1) + a(9) x1r = a(0) - a(8) x1i = a(1) - a(9) x2r = a(4) + a(12) x2i = a(5) + a(13) x3r = a(4) - a(12) x3i = a(5) - a(13) y0r = x0r + x2r y0i = x0i + x2i y2r = x0r - x2r y2i = x0i - x2i y1r = x1r - x3i y1i = x1i + x3r y3r = x1r + x3i y3i = x1i - x3r x0r = a(2) + a(10) x0i = a(3) + a(11) x1r = a(2) - a(10) x1i = a(3) - a(11) x2r = a(6) + a(14) x2i = a(7) + a(15) x3r = a(6) - a(14) x3i = a(7) - a(15) y4r = x0r + x2r y4i = x0i + x2i y6r = x0r - x2r y6i = x0i - x2i x0r = x1r - x3i x0i = x1i + x3r x2r = x1r + x3i x2i = x1i - x3r y5r = wn4r * (x0r - x0i) y5i = wn4r * (x0r + x0i) y7r = wn4r * (x2r - x2i) y7i = wn4r * (x2r + x2i) a(8) = y1r + y5r a(9) = y1i + y5i a(10) = y1r - y5r a(11) = y1i - y5i a(12) = y3r - y7i a(13) = y3i + y7r a(14) = y3r + y7i a(15) = y3i - y7r a(0) = y0r + y4r a(1) = y0i + y4i a(2) = y0r - y4r a(3) = y0i - y4i a(4) = y2r - y6i a(5) = y2i + y6r a(6) = y2r + y6i a(7) = y2i - y6r end ! subroutine cftf082(a, w) real*8 a(0 : 15), w(0 : *) real*8 wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i real*8 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i real*8 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i wn4r = w(1) wk1r = w(2) wk1i = w(3) y0r = a(0) - a(9) y0i = a(1) + a(8) y1r = a(0) + a(9) y1i = a(1) - a(8) x0r = a(4) - a(13) x0i = a(5) + a(12) y2r = wn4r * (x0r - x0i) y2i = wn4r * (x0i + x0r) x0r = a(4) + a(13) x0i = a(5) - a(12) y3r = wn4r * (x0r - x0i) y3i = wn4r * (x0i + x0r) x0r = a(2) - a(11) x0i = a(3) + a(10) y4r = wk1r * x0r - wk1i * x0i y4i = wk1r * x0i + wk1i * x0r x0r = a(2) + a(11) x0i = a(3) - a(10) y5r = wk1i * x0r - wk1r * x0i y5i = wk1i * x0i + wk1r * x0r x0r = a(6) - a(15) x0i = a(7) + a(14) y6r = wk1i * x0r - wk1r * x0i y6i = wk1i * x0i + wk1r * x0r x0r = a(6) + a(15) x0i = a(7) - a(14) y7r = wk1r * x0r - wk1i * x0i y7i = wk1r * x0i + wk1i * x0r x0r = y0r + y2r x0i = y0i + y2i x1r = y4r + y6r x1i = y4i + y6i a(0) = x0r + x1r a(1) = x0i + x1i a(2) = x0r - x1r a(3) = x0i - x1i x0r = y0r - y2r x0i = y0i - y2i x1r = y4r - y6r x1i = y4i - y6i a(4) = x0r - x1i a(5) = x0i + x1r a(6) = x0r + x1i a(7) = x0i - x1r x0r = y1r - y3i x0i = y1i + y3r x1r = y5r - y7r x1i = y5i - y7i a(8) = x0r + x1r a(9) = x0i + x1i a(10) = x0r - x1r a(11) = x0i - x1i x0r = y1r + y3i x0i = y1i - y3r x1r = y5r + y7r x1i = y5i + y7i a(12) = x0r - x1i a(13) = x0i + x1r a(14) = x0r + x1i a(15) = x0i - x1r end ! subroutine cftf040(a) real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i x0r = a(0) + a(4) x0i = a(1) + a(5) x1r = a(0) - a(4) x1i = a(1) - a(5) x2r = a(2) + a(6) x2i = a(3) + a(7) x3r = a(2) - a(6) x3i = a(3) - a(7) a(0) = x0r + x2r a(1) = x0i + x2i a(2) = x1r - x3i a(3) = x1i + x3r a(4) = x0r - x2r a(5) = x0i - x2i a(6) = x1r + x3i a(7) = x1i - x3r end ! subroutine cftb040(a) real*8 a(0 : 7), x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i x0r = a(0) + a(4) x0i = a(1) + a(5) x1r = a(0) - a(4) x1i = a(1) - a(5) x2r = a(2) + a(6) x2i = a(3) + a(7) x3r = a(2) - a(6) x3i = a(3) - a(7) a(0) = x0r + x2r a(1) = x0i + x2i a(2) = x1r + x3i a(3) = x1i - x3r a(4) = x0r - x2r a(5) = x0i - x2i a(6) = x1r - x3i a(7) = x1i + x3r end ! subroutine cftx020(a) real*8 a(0 : 3), x0r, x0i x0r = a(0) - a(2) x0i = a(1) - a(3) a(0) = a(0) + a(2) a(1) = a(1) + a(3) a(2) = x0r a(3) = x0i end ! subroutine rftfsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr - wki * xi yi = wkr * xi + wki * xr a(j) = a(j) - yr a(j + 1) = a(j + 1) - yi a(k) = a(k) + yr a(k + 1) = a(k + 1) - yi end do end ! subroutine rftbsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr, xi, yr, yi m = n / 2 ks = 2 * nc / m kk = 0 do j = 2, m - 2, 2 k = n - j kk = kk + ks wkr = 0.5d0 - c(nc - kk) wki = c(kk) xr = a(j) - a(k) xi = a(j + 1) + a(k + 1) yr = wkr * xr + wki * xi yi = wkr * xi - wki * xr a(j) = a(j) - yr a(j + 1) = a(j + 1) - yi a(k) = a(k) + yr a(k + 1) = a(k + 1) - yi end do end ! subroutine dctsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(j) - wkr * a(k) a(j) = wkr * a(j) + wki * a(k) a(k) = xr end do a(m) = c(0) * a(m) end ! subroutine dstsub(n, a, nc, c) integer n, nc, j, k, kk, ks, m real*8 a(0 : n - 1), c(0 : nc - 1), wkr, wki, xr m = n / 2 ks = nc / n kk = 0 do j = 1, m - 1 k = n - j kk = kk + ks wkr = c(kk) - c(nc - kk) wki = c(kk) + c(nc - kk) xr = wki * a(k) - wkr * a(j) a(k) = wkr * a(k) + wki * a(j) a(j) = xr end do a(m) = c(0) * a(m) end ! OouraFFT-1.0/fftsg2d.c000066400000000000000000001111331246725775500145260ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :two data length :power of 2 decimation :frequency radix :split-radix, row-column data :inplace table :use functions cdft2d: Complex Discrete Fourier Transform rdft2d: Real Discrete Fourier Transform ddct2d: Discrete Cosine Transform ddst2d: Discrete Sine Transform function prototypes void cdft2d(int, int, int, double **, double *, int *, double *); void rdft2d(int, int, int, double **, double *, int *, double *); void rdft2dsort(int, int, int, double **); void ddct2d(int, int, int, double **, double *, int *, double *); void ddst2d(int, int, int, double **, double *, int *, double *); necessary package fftsg.c : 1D-FFT package macro definitions USE_FFT2D_PTHREADS : default=not defined FFT2D_MAX_THREADS : must be 2^N, default=4 FFT2D_THREADS_BEGIN_N : default=65536 USE_FFT2D_WINTHREADS : default=not defined FFT2D_MAX_THREADS : must be 2^N, default=4 FFT2D_THREADS_BEGIN_N : default=131072 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * exp(2*pi*i*j1*k1/n1) * exp(2*pi*i*j2*k2/n2), 0<=k1 X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] * exp(-2*pi*i*j1*k1/n1) * exp(-2*pi*i*j2*k2/n2), 0<=k1 ip[0] = 0; // first time only cdft2d(n1, 2*n2, 1, a, t, ip, w); ip[0] = 0; // first time only cdft2d(n1, 2*n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 1, n1 = power of 2 2*n2 :data length (int) n2 >= 1, n2 = power of 2 a[0...n1-1][0...2*n2-1] :input/output data (double **) input data a[j1][2*j2] = Re(x[j1][j2]), a[j1][2*j2+1] = Im(x[j1][j2]), 0<=j1= 8*n1, if single thread, length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1, n2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft2d(n1, 2*n2, -1, a, t, ip, w); is cdft2d(n1, 2*n2, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= 2 * n2 - 1; j2++) { a[j1][j2] *= 1.0 / n1 / n2; } } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), 0<=k1 IRDFT (excluding scale) a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 (R[j1][j2] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + I[j1][j2] * sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), 0<=k1 ip[0] = 0; // first time only rdft2d(n1, n2, 1, a, t, ip, w); ip[0] = 0; // first time only rdft2d(n1, n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) output data a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2], a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2], 0 input data a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2], a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2], 0= 8*n1, if single thread, length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/4) + n2/4 w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft2d(n1, n2, 1, a, t, ip, w); is rdft2d(n1, n2, -1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 2.0 / n1 / n2; } } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(pi*j1*(k1+1/2)/n1) * cos(pi*j2*(k2+1/2)/n2), 0<=k1 DCT C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * cos(pi*(j1+1/2)*k1/n1) * cos(pi*(j2+1/2)*k2/n2), 0<=k1 ip[0] = 0; // first time only ddct2d(n1, n2, 1, a, t, ip, w); ip[0] = 0; // first time only ddct2d(n1, n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) output data a[k1][k2] = C[k1][k2], 0<=k1= 4*n1, if single thread, length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1/2, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1*3/2, n2*3/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct2d(n1, n2, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { a[j1][0] *= 0.5; } for (j2 = 0; j2 <= n2 - 1; j2++) { a[0][j2] *= 0.5; } ddct2d(n1, n2, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 4.0 / n1 / n2; } } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] * sin(pi*j1*(k1+1/2)/n1) * sin(pi*j2*(k2+1/2)/n2), 0<=k1 DST S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] * sin(pi*(j1+1/2)*k1/n1) * sin(pi*(j2+1/2)*k2/n2), 0 ip[0] = 0; // first time only ddst2d(n1, n2, 1, a, t, ip, w); ip[0] = 0; // first time only ddst2d(n1, n2, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 a[0...n1-1][0...n2-1] :input/output data (double **) input data a[j1][j2] = A[j1][j2], 0 output data a[k1][k2] = S[k1][k2], 0= 4*n1, if single thread, length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1/2, n2/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1*3/2, n2*3/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst2d(n1, n2, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { a[j1][0] *= 0.5; } for (j2 = 0; j2 <= n2 - 1; j2++) { a[0][j2] *= 0.5; } ddst2d(n1, n2, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] *= 4.0 / n1 / n2; } } . */ #include #include #define fft2d_alloc_error_check(p) { \ if ((p) == NULL) { \ fprintf(stderr, "fft2d memory allocation error\n"); \ exit(1); \ } \ } #ifdef USE_FFT2D_PTHREADS #define USE_FFT2D_THREADS #ifndef FFT2D_MAX_THREADS #define FFT2D_MAX_THREADS 4 #endif #ifndef FFT2D_THREADS_BEGIN_N #define FFT2D_THREADS_BEGIN_N 65536 #endif #include #define fft2d_thread_t pthread_t #define fft2d_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ fprintf(stderr, "fft2d thread error\n"); \ exit(1); \ } \ } #define fft2d_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "fft2d thread error\n"); \ exit(1); \ } \ } #endif /* USE_FFT2D_PTHREADS */ #ifdef USE_FFT2D_WINTHREADS #define USE_FFT2D_THREADS #ifndef FFT2D_MAX_THREADS #define FFT2D_MAX_THREADS 4 #endif #ifndef FFT2D_THREADS_BEGIN_N #define FFT2D_THREADS_BEGIN_N 131072 #endif #include #define fft2d_thread_t HANDLE #define fft2d_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "fft2d thread error\n"); \ exit(1); \ } \ } #define fft2d_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_FFT2D_WINTHREADS */ void cdft2d(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void cdft(int n, int isgn, double *a, int *ip, double *w); void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w); #ifdef USE_FFT2D_THREADS void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, int *ip, double *w); void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w); #endif /* USE_FFT2D_THREADS */ int n, itnull, nthread, nt, i; n = n1 << 1; if (n < n2) { n = n2; } if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } itnull = 0; if (t == NULL) { itnull = 1; nthread = 1; #ifdef USE_FFT2D_THREADS nthread = FFT2D_MAX_THREADS; #endif /* USE_FFT2D_THREADS */ nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft2d_alloc_error_check(t); } #ifdef USE_FFT2D_THREADS if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { xdft2d0_subth(n1, n2, 0, isgn, a, ip, w); cdft2d_subth(n1, n2, isgn, a, t, ip, w); } else #endif /* USE_FFT2D_THREADS */ { for (i = 0; i < n1; i++) { cdft(n2, isgn, a[i], ip, w); } cdft2d_sub(n1, n2, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } void rdft2d(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void rdft(int n, int isgn, double *a, int *ip, double *w); void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w); void rdft2d_sub(int n1, int n2, int isgn, double **a); #ifdef USE_FFT2D_THREADS void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, int *ip, double *w); void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w); #endif /* USE_FFT2D_THREADS */ int n, nw, nc, itnull, nthread, nt, i; n = n1 << 1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n2 > (nc << 2)) { nc = n2 >> 2; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nthread = 1; #ifdef USE_FFT2D_THREADS nthread = FFT2D_MAX_THREADS; #endif /* USE_FFT2D_THREADS */ nt = 8 * nthread * n1; if (n2 == 4 * nthread) { nt >>= 1; } else if (n2 < 4 * nthread) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft2d_alloc_error_check(t); } #ifdef USE_FFT2D_THREADS if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { if (isgn < 0) { rdft2d_sub(n1, n2, isgn, a); cdft2d_subth(n1, n2, isgn, a, t, ip, w); } xdft2d0_subth(n1, n2, 1, isgn, a, ip, w); if (isgn >= 0) { cdft2d_subth(n1, n2, isgn, a, t, ip, w); rdft2d_sub(n1, n2, isgn, a); } } else #endif /* USE_FFT2D_THREADS */ { if (isgn < 0) { rdft2d_sub(n1, n2, isgn, a); cdft2d_sub(n1, n2, isgn, a, t, ip, w); } for (i = 0; i < n1; i++) { rdft(n2, isgn, a[i], ip, w); } if (isgn >= 0) { cdft2d_sub(n1, n2, isgn, a, t, ip, w); rdft2d_sub(n1, n2, isgn, a); } } if (itnull != 0) { free(t); } } void rdft2dsort(int n1, int n2, int isgn, double **a) { int n1h, i; double x, y; n1h = n1 >> 1; if (isgn < 0) { for (i = n1h + 1; i < n1; i++) { a[i][0] = a[i][n2 + 1]; a[i][1] = a[i][n2]; } a[0][1] = a[0][n2]; a[n1h][1] = a[n1h][n2]; } else { for (i = n1h + 1; i < n1; i++) { y = a[i][0]; x = a[i][1]; a[i][n2] = x; a[i][n2 + 1] = y; a[n1 - i][n2] = x; a[n1 - i][n2 + 1] = -y; a[i][0] = a[n1 - i][0]; a[i][1] = -a[n1 - i][1]; } a[0][n2] = a[0][1]; a[0][n2 + 1] = 0; a[0][1] = 0; a[n1h][n2] = a[n1h][1]; a[n1h][n2 + 1] = 0; a[n1h][1] = 0; } } void ddct2d(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void ddct(int n, int isgn, double *a, int *ip, double *w); void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w); #ifdef USE_FFT2D_THREADS void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, int *ip, double *w); void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w); #endif /* USE_FFT2D_THREADS */ int n, nw, nc, itnull, nthread, nt, i; n = n1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nthread = 1; #ifdef USE_FFT2D_THREADS nthread = FFT2D_MAX_THREADS; #endif /* USE_FFT2D_THREADS */ nt = 4 * nthread * n1; if (n2 == 2 * nthread) { nt >>= 1; } else if (n2 < 2 * nthread) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft2d_alloc_error_check(t); } #ifdef USE_FFT2D_THREADS if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w); ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w); } else #endif /* USE_FFT2D_THREADS */ { for (i = 0; i < n1; i++) { ddct(n2, isgn, a[i], ip, w); } ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } void ddst2d(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void ddst(int n, int isgn, double *a, int *ip, double *w); void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w); #ifdef USE_FFT2D_THREADS void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, int *ip, double *w); void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w); #endif /* USE_FFT2D_THREADS */ int n, nw, nc, itnull, nthread, nt, i; n = n1; if (n < n2) { n = n2; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nthread = 1; #ifdef USE_FFT2D_THREADS nthread = FFT2D_MAX_THREADS; #endif /* USE_FFT2D_THREADS */ nt = 4 * nthread * n1; if (n2 == 2 * nthread) { nt >>= 1; } else if (n2 < 2 * nthread) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft2d_alloc_error_check(t); } #ifdef USE_FFT2D_THREADS if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) { ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w); ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w); } else #endif /* USE_FFT2D_THREADS */ { for (i = 0; i < n1; i++) { ddst(n2, isgn, a[i], ip, w); } ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } /* -------- child routines -------- */ void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void cdft(int n, int isgn, double *a, int *ip, double *w); int i, j; if (n2 > 4) { for (j = 0; j < n2; j += 8) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j]; t[2 * i + 1] = a[i][j + 1]; t[2 * n1 + 2 * i] = a[i][j + 2]; t[2 * n1 + 2 * i + 1] = a[i][j + 3]; t[4 * n1 + 2 * i] = a[i][j + 4]; t[4 * n1 + 2 * i + 1] = a[i][j + 5]; t[6 * n1 + 2 * i] = a[i][j + 6]; t[6 * n1 + 2 * i + 1] = a[i][j + 7]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); cdft(2 * n1, isgn, &t[4 * n1], ip, w); cdft(2 * n1, isgn, &t[6 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j] = t[2 * i]; a[i][j + 1] = t[2 * i + 1]; a[i][j + 2] = t[2 * n1 + 2 * i]; a[i][j + 3] = t[2 * n1 + 2 * i + 1]; a[i][j + 4] = t[4 * n1 + 2 * i]; a[i][j + 5] = t[4 * n1 + 2 * i + 1]; a[i][j + 6] = t[6 * n1 + 2 * i]; a[i][j + 7] = t[6 * n1 + 2 * i + 1]; } } } else if (n2 == 4) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][0]; t[2 * i + 1] = a[i][1]; t[2 * n1 + 2 * i] = a[i][2]; t[2 * n1 + 2 * i + 1] = a[i][3]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][0] = t[2 * i]; a[i][1] = t[2 * i + 1]; a[i][2] = t[2 * n1 + 2 * i]; a[i][3] = t[2 * n1 + 2 * i + 1]; } } else if (n2 == 2) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][0]; t[2 * i + 1] = a[i][1]; } cdft(2 * n1, isgn, t, ip, w); for (i = 0; i < n1; i++) { a[i][0] = t[2 * i]; a[i][1] = t[2 * i + 1]; } } } void rdft2d_sub(int n1, int n2, int isgn, double **a) { int n1h, i, j; double xi; n1h = n1 >> 1; if (isgn < 0) { for (i = 1; i < n1h; i++) { j = n1 - i; xi = a[i][0] - a[j][0]; a[i][0] += a[j][0]; a[j][0] = xi; xi = a[j][1] - a[i][1]; a[i][1] += a[j][1]; a[j][1] = xi; } } else { for (i = 1; i < n1h; i++) { j = n1 - i; a[j][0] = 0.5 * (a[i][0] - a[j][0]); a[i][0] -= a[j][0]; a[j][1] = 0.5 * (a[i][1] + a[j][1]); a[i][1] -= a[j][1]; } } } void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int i, j; if (n2 > 2) { for (j = 0; j < n2; j += 4) { for (i = 0; i < n1; i++) { t[i] = a[i][j]; t[n1 + i] = a[i][j + 1]; t[2 * n1 + i] = a[i][j + 2]; t[3 * n1 + i] = a[i][j + 3]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); ddct(n1, isgn, &t[2 * n1], ip, w); ddct(n1, isgn, &t[3 * n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); ddst(n1, isgn, &t[2 * n1], ip, w); ddst(n1, isgn, &t[3 * n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j] = t[i]; a[i][j + 1] = t[n1 + i]; a[i][j + 2] = t[2 * n1 + i]; a[i][j + 3] = t[3 * n1 + i]; } } } else if (n2 == 2) { for (i = 0; i < n1; i++) { t[i] = a[i][0]; t[n1 + i] = a[i][1]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); } for (i = 0; i < n1; i++) { a[i][0] = t[i]; a[i][1] = t[n1 + i]; } } } #ifdef USE_FFT2D_THREADS struct fft2d_arg_st { int nthread; int n0; int n1; int n2; int ic; int isgn; double **a; double *t; int *ip; double *w; }; typedef struct fft2d_arg_st fft2d_arg_t; void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a, int *ip, double *w) { void *xdft2d0_th(void *p); fft2d_thread_t th[FFT2D_MAX_THREADS]; fft2d_arg_t ag[FFT2D_MAX_THREADS]; int nthread, i; nthread = FFT2D_MAX_THREADS; if (nthread > n1) { nthread = n1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].ic = icr; ag[i].isgn = isgn; ag[i].a = a; ag[i].ip = ip; ag[i].w = w; fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft2d_thread_wait(th[i]); } } void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t, int *ip, double *w) { void *cdft2d_th(void *p); fft2d_thread_t th[FFT2D_MAX_THREADS]; fft2d_arg_t ag[FFT2D_MAX_THREADS]; int nthread, nt, i; nthread = FFT2D_MAX_THREADS; nt = 8 * n1; if (n2 == 4 * FFT2D_MAX_THREADS) { nt >>= 1; } else if (n2 < 4 * FFT2D_MAX_THREADS) { nthread = n2 >> 1; nt >>= 2; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft2d_thread_create(&th[i], cdft2d_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft2d_thread_wait(th[i]); } } void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a, int *ip, double *w) { void *ddxt2d0_th(void *p); fft2d_thread_t th[FFT2D_MAX_THREADS]; fft2d_arg_t ag[FFT2D_MAX_THREADS]; int nthread, i; nthread = FFT2D_MAX_THREADS; if (nthread > n1) { nthread = n1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].ip = ip; ag[i].w = w; fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft2d_thread_wait(th[i]); } } void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a, double *t, int *ip, double *w) { void *ddxt2d_th(void *p); fft2d_thread_t th[FFT2D_MAX_THREADS]; fft2d_arg_t ag[FFT2D_MAX_THREADS]; int nthread, nt, i; nthread = FFT2D_MAX_THREADS; nt = 4 * n1; if (n2 == 2 * FFT2D_MAX_THREADS) { nt >>= 1; } else if (n2 < 2 * FFT2D_MAX_THREADS) { nthread = n2; nt >>= 2; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft2d_thread_wait(th[i]); } } void *xdft2d0_th(void *p) { void cdft(int n, int isgn, double *a, int *ip, double *w); void rdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, icr, isgn, *ip, i; double **a, *w; nthread = ((fft2d_arg_t *) p)->nthread; n0 = ((fft2d_arg_t *) p)->n0; n1 = ((fft2d_arg_t *) p)->n1; n2 = ((fft2d_arg_t *) p)->n2; icr = ((fft2d_arg_t *) p)->ic; isgn = ((fft2d_arg_t *) p)->isgn; a = ((fft2d_arg_t *) p)->a; ip = ((fft2d_arg_t *) p)->ip; w = ((fft2d_arg_t *) p)->w; if (icr == 0) { for (i = n0; i < n1; i += nthread) { cdft(n2, isgn, a[i], ip, w); } } else { for (i = n0; i < n1; i += nthread) { rdft(n2, isgn, a[i], ip, w); } } return (void *) 0; } void *cdft2d_th(void *p) { void cdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, isgn, *ip, i, j; double **a, *t, *w; nthread = ((fft2d_arg_t *) p)->nthread; n0 = ((fft2d_arg_t *) p)->n0; n1 = ((fft2d_arg_t *) p)->n1; n2 = ((fft2d_arg_t *) p)->n2; isgn = ((fft2d_arg_t *) p)->isgn; a = ((fft2d_arg_t *) p)->a; t = ((fft2d_arg_t *) p)->t; ip = ((fft2d_arg_t *) p)->ip; w = ((fft2d_arg_t *) p)->w; if (n2 > 4 * nthread) { for (j = 8 * n0; j < n2; j += 8 * nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j]; t[2 * i + 1] = a[i][j + 1]; t[2 * n1 + 2 * i] = a[i][j + 2]; t[2 * n1 + 2 * i + 1] = a[i][j + 3]; t[4 * n1 + 2 * i] = a[i][j + 4]; t[4 * n1 + 2 * i + 1] = a[i][j + 5]; t[6 * n1 + 2 * i] = a[i][j + 6]; t[6 * n1 + 2 * i + 1] = a[i][j + 7]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); cdft(2 * n1, isgn, &t[4 * n1], ip, w); cdft(2 * n1, isgn, &t[6 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j] = t[2 * i]; a[i][j + 1] = t[2 * i + 1]; a[i][j + 2] = t[2 * n1 + 2 * i]; a[i][j + 3] = t[2 * n1 + 2 * i + 1]; a[i][j + 4] = t[4 * n1 + 2 * i]; a[i][j + 5] = t[4 * n1 + 2 * i + 1]; a[i][j + 6] = t[6 * n1 + 2 * i]; a[i][j + 7] = t[6 * n1 + 2 * i + 1]; } } } else if (n2 == 4 * nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][4 * n0]; t[2 * i + 1] = a[i][4 * n0 + 1]; t[2 * n1 + 2 * i] = a[i][4 * n0 + 2]; t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][4 * n0] = t[2 * i]; a[i][4 * n0 + 1] = t[2 * i + 1]; a[i][4 * n0 + 2] = t[2 * n1 + 2 * i]; a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1]; } } else if (n2 == 2 * nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][2 * n0]; t[2 * i + 1] = a[i][2 * n0 + 1]; } cdft(2 * n1, isgn, t, ip, w); for (i = 0; i < n1; i++) { a[i][2 * n0] = t[2 * i]; a[i][2 * n0 + 1] = t[2 * i + 1]; } } return (void *) 0; } void *ddxt2d0_th(void *p) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, ics, isgn, *ip, i; double **a, *w; nthread = ((fft2d_arg_t *) p)->nthread; n0 = ((fft2d_arg_t *) p)->n0; n1 = ((fft2d_arg_t *) p)->n1; n2 = ((fft2d_arg_t *) p)->n2; ics = ((fft2d_arg_t *) p)->ic; isgn = ((fft2d_arg_t *) p)->isgn; a = ((fft2d_arg_t *) p)->a; ip = ((fft2d_arg_t *) p)->ip; w = ((fft2d_arg_t *) p)->w; if (ics == 0) { for (i = n0; i < n1; i += nthread) { ddct(n2, isgn, a[i], ip, w); } } else { for (i = n0; i < n1; i += nthread) { ddst(n2, isgn, a[i], ip, w); } } return (void *) 0; } void *ddxt2d_th(void *p) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, ics, isgn, *ip, i, j; double **a, *t, *w; nthread = ((fft2d_arg_t *) p)->nthread; n0 = ((fft2d_arg_t *) p)->n0; n1 = ((fft2d_arg_t *) p)->n1; n2 = ((fft2d_arg_t *) p)->n2; ics = ((fft2d_arg_t *) p)->ic; isgn = ((fft2d_arg_t *) p)->isgn; a = ((fft2d_arg_t *) p)->a; t = ((fft2d_arg_t *) p)->t; ip = ((fft2d_arg_t *) p)->ip; w = ((fft2d_arg_t *) p)->w; if (n2 > 2 * nthread) { for (j = 4 * n0; j < n2; j += 4 * nthread) { for (i = 0; i < n1; i++) { t[i] = a[i][j]; t[n1 + i] = a[i][j + 1]; t[2 * n1 + i] = a[i][j + 2]; t[3 * n1 + i] = a[i][j + 3]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); ddct(n1, isgn, &t[2 * n1], ip, w); ddct(n1, isgn, &t[3 * n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); ddst(n1, isgn, &t[2 * n1], ip, w); ddst(n1, isgn, &t[3 * n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j] = t[i]; a[i][j + 1] = t[n1 + i]; a[i][j + 2] = t[2 * n1 + i]; a[i][j + 3] = t[3 * n1 + i]; } } } else if (n2 == 2 * nthread) { for (i = 0; i < n1; i++) { t[i] = a[i][2 * n0]; t[n1 + i] = a[i][2 * n0 + 1]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); } for (i = 0; i < n1; i++) { a[i][2 * n0] = t[i]; a[i][2 * n0 + 1] = t[n1 + i]; } } else if (n2 == nthread) { for (i = 0; i < n1; i++) { t[i] = a[i][n0]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); } else { ddst(n1, isgn, t, ip, w); } for (i = 0; i < n1; i++) { a[i][n0] = t[i]; } } return (void *) 0; } #endif /* USE_FFT2D_THREADS */ OouraFFT-1.0/fftsg2d.f000066400000000000000000000511251246725775500145350ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :two ! data length :power of 2 ! decimation :frequency ! radix :split-radix, row-column ! data :inplace ! table :use ! subroutines ! cdft2d: Complex Discrete Fourier Transform ! rdft2d: Real Discrete Fourier Transform ! ddct2d: Discrete Cosine Transform ! ddst2d: Discrete Sine Transform ! necessary package ! fftsg.f : 1D-FFT package ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * ! exp(2*pi*i*j1*k1/n1) * ! exp(2*pi*i*j2*k2/n2), ! 0<=k1 ! X(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 x(j1,j2) * ! exp(-2*pi*i*j1*k1/n1) * ! exp(-2*pi*i*j2*k2/n2), ! 0<=k1 ! ip(0) = 0 ! first time only ! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! 2*n1 :data length (integer) ! n1 >= 1, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 1, n2 = power of 2 ! a(0:2*n1-1,0:n2-1) ! :input/output data (real*8) ! input data ! a(2*j1,j2) = Re(x(j1,j2)), ! a(2*j1+1,j2) = Im(x(j1,j2)), ! 0<=j1= 8*n2 ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/2, n2/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft2d(n1max, 2*n1, n2, -1, a, t, ip, w) ! is ! call cdft2d(n1max, 2*n1, n2, 1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, 2 * n1 - 1 ! a(j1, j2) = a(j1, j2) * (1.0d0 / n1 / n2) ! end do ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2), ! 0<=k1 IRDFT (excluding scale) ! a(k1,k2) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 ! (R(j1,j2) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) + ! I(j1,j2) * ! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)), ! 0<=k1 ! ip(0) = 0 ! first time only ! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! ! output data ! a(2*k1,k2) = R(k1,k2) = R(n1-k1,n2-k2), ! a(2*k1+1,k2) = I(k1,k2) = -I(n1-k1,n2-k2), ! 0 ! input data ! a(2*j1,j2) = R(j1,j2) = R(n1-j1,n2-j2), ! a(2*j1+1,j2) = I(j1,j2) = -I(n1-j1,n2-j2), ! 0= 8*n2 ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/4, n2/2) + n1/4 ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft2d(n1max, n1, n2, 1, a, t, ip, w) ! is ! call rdft2d(n1max, n1, n2, -1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (2.0d0 / n1 / n2) ! end do ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(pi*j1*(k1+1/2)/n1) * ! cos(pi*j2*(k2+1/2)/n2), ! 0<=k1 DCT ! C(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! cos(pi*(j1+1/2)*k1/n1) * ! cos(pi*(j2+1/2)*k2/n2), ! 0<=k1 ! ip(0) = 0 ! first time only ! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! output data ! a(k1,k2) = C(k1,k2), 0<=k1= 4*n2 ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2/2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1*3/2, n2*3/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct2d(n1max, n1, n2, -1, a, t, ip, w) ! is ! do j1 = 0, n1 - 1 ! a(j1, 0) = a(j1, 0) * 0.5d0 ! end do ! do j2 = 0, n2 - 1 ! a(0, j2) = a(0, j2) * 0.5d0 ! end do ! call ddct2d(n1max, n1, n2, 1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) ! end do ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k1,k2) = sum_j1=1^n1 sum_j2=1^n2 A(j1,j2) * ! sin(pi*j1*(k1+1/2)/n1) * ! sin(pi*j2*(k2+1/2)/n2), ! 0<=k1 DST ! S(k1,k2) = sum_j1=0^n1-1 sum_j2=0^n2-1 a(j1,j2) * ! sin(pi*(j1+1/2)*k1/n1) * ! sin(pi*(j2+1/2)*k2/n2), ! 0 ! ip(0) = 0 ! first time only ! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) ! [parameters] ! n1max :row size of the 2D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! a(0:n1-1,0:n2-1) ! :input/output data (real*8) ! ! input data ! a(j1,j2) = A(j1,j2), 0 ! output data ! a(k1,k2) = S(k1,k2), 0= 4*n2 ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2/2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1*3/2, n2*3/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst2d(n1max, n1, n2, -1, a, t, ip, w) ! is ! do j1 = 0, n1 - 1 ! a(j1, 0) = a(j1, 0) * 0.5d0 ! end do ! do j2 = 0, n2 - 1 ! a(0, j2) = a(0, j2) * 0.5d0 ! end do ! call ddst2d(n1max, n1, n2, 1, a, t, ip, w) ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2) = a(j1, j2) * (4.0d0 / n1 / n2) ! end do ! end do ! . ! ! subroutine cdft2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), & w(0 : *) n = max(n1, 2 * n2) if (n .gt. 4 * ip(0)) then call makewt(n / 4, ip, w) end if do j = 0, n2 - 1 call cdft(n1, isgn, a(0, j), ip, w) end do call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) end ! subroutine rdft2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), & w(0 : *) n = max(n1, 2 * n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n1 .gt. 4 * nc) then nc = n1 / 4 call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then call rdft2d_sub(n1max, n1, n2, isgn, a) call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) end if do j = 0, n2 - 1 call rdft(n1, isgn, a(0, j), ip, w) end do if (isgn .ge. 0) then call cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) call rdft2d_sub(n1max, n1, n2, isgn, a) end if end ! subroutine rdft2dsort(n1max, n1, n2, isgn, a) integer n1max, n1, n2, isgn, n2h, j real*8 a(0 : n1max - 1, 0 : n2 - 1), x, y n2h = n2 / 2 if (isgn .lt. 0) then do j = n2h + 1, n2 - 1 a(0, j) = a(n1 + 1, j) a(1, j) = a(n1, j) end do a(1, 0) = a(n1, 0) a(1, n2h) = a(n1, n2h) else do j = n2h + 1, n2 - 1 y = a(0, j) x = a(1, j) a(n1, j) = x a(n1 + 1, j) = y a(n1, n2 - j) = x a(n1 + 1, n2 - j) = -y a(0, j) = a(0, n2 - j) a(1, j) = -a(1, n2 - j) end do a(n1, 0) = a(1, 0) a(n1 + 1, 0) = 0 a(1, 0) = 0 a(n1, n2h) = a(1, n2h) a(n1 + 1, n2h) = 0 a(1, n2h) = 0 end if end ! subroutine ddct2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), & w(0 : *) n = max(n1, n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if do j = 0, n2 - 1 call ddct(n1, isgn, a(0, j), ip, w) end do call ddxt2d_sub(n1max, n1, n2, 0, isgn, a, t, ip, w) end ! subroutine ddst2d(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), n, nw, nc, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), & w(0 : *) n = max(n1, n2) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if do j = 0, n2 - 1 call ddst(n1, isgn, a(0, j), ip, w) end do call ddxt2d_sub(n1max, n1, n2, 1, isgn, a, t, ip, w) end ! ! -------- child routines -------- ! subroutine cdft2d_sub(n1max, n1, n2, isgn, a, t, ip, w) integer n1max, n1, n2, isgn, ip(0 : *), i, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 8 * n2 - 1), & w(0 : *) if (n1 .gt. 4) then do i = 0, n1 - 8, 8 do j = 0, n2 - 1 t(2 * j) = a(i, j) t(2 * j + 1) = a(i + 1, j) t(2 * n2 + 2 * j) = a(i + 2, j) t(2 * n2 + 2 * j + 1) = a(i + 3, j) t(4 * n2 + 2 * j) = a(i + 4, j) t(4 * n2 + 2 * j + 1) = a(i + 5, j) t(6 * n2 + 2 * j) = a(i + 6, j) t(6 * n2 + 2 * j + 1) = a(i + 7, j) end do call cdft(2 * n2, isgn, t, ip, w) call cdft(2 * n2, isgn, t(2 * n2), ip, w) call cdft(2 * n2, isgn, t(4 * n2), ip, w) call cdft(2 * n2, isgn, t(6 * n2), ip, w) do j = 0, n2 - 1 a(i, j) = t(2 * j) a(i + 1, j) = t(2 * j + 1) a(i + 2, j) = t(2 * n2 + 2 * j) a(i + 3, j) = t(2 * n2 + 2 * j + 1) a(i + 4, j) = t(4 * n2 + 2 * j) a(i + 5, j) = t(4 * n2 + 2 * j + 1) a(i + 6, j) = t(6 * n2 + 2 * j) a(i + 7, j) = t(6 * n2 + 2 * j + 1) end do end do else if (n1 .eq. 4) then do j = 0, n2 - 1 t(2 * j) = a(0, j) t(2 * j + 1) = a(1, j) t(2 * n2 + 2 * j) = a(2, j) t(2 * n2 + 2 * j + 1) = a(3, j) end do call cdft(2 * n2, isgn, t, ip, w) call cdft(2 * n2, isgn, t(2 * n2), ip, w) do j = 0, n2 - 1 a(0, j) = t(2 * j) a(1, j) = t(2 * j + 1) a(2, j) = t(2 * n2 + 2 * j) a(3, j) = t(2 * n2 + 2 * j + 1) end do else if (n1 .eq. 2) then do j = 0, n2 - 1 t(2 * j) = a(0, j) t(2 * j + 1) = a(1, j) end do call cdft(2 * n2, isgn, t, ip, w) do j = 0, n2 - 1 a(0, j) = t(2 * j) a(1, j) = t(2 * j + 1) end do end if end ! subroutine rdft2d_sub(n1max, n1, n2, isgn, a) integer n1max, n1, n2, isgn, n2h, i, j real*8 a(0 : n1max - 1, 0 : n2 - 1), xi n2h = n2 / 2 if (isgn .lt. 0) then do i = 1, n2h - 1 j = n2 - i xi = a(0, i) - a(0, j) a(0, i) = a(0, i) + a(0, j) a(0, j) = xi xi = a(1, j) - a(1, i) a(1, i) = a(1, i) + a(1, j) a(1, j) = xi end do else do i = 1, n2h - 1 j = n2 - i a(0, j) = 0.5d0 * (a(0, i) - a(0, j)) a(0, i) = a(0, i) - a(0, j) a(1, j) = 0.5d0 * (a(1, i) + a(1, j)) a(1, i) = a(1, i) - a(1, j) end do end if end ! subroutine ddxt2d_sub(n1max, n1, n2, ics, isgn, a, t, & ip, w) integer n1max, n1, n2, ics, isgn, ip(0 : *), i, j real*8 a(0 : n1max - 1, 0 : n2 - 1), t(0 : 4 * n2 - 1), & w(0 : *) if (n1 .gt. 2) then do i = 0, n1 - 4, 4 do j = 0, n2 - 1 t(j) = a(i, j) t(n2 + j) = a(i + 1, j) t(2 * n2 + j) = a(i + 2, j) t(3 * n2 + j) = a(i + 3, j) end do if (ics .eq. 0) then call ddct(n2, isgn, t, ip, w) call ddct(n2, isgn, t(n2), ip, w) call ddct(n2, isgn, t(2 * n2), ip, w) call ddct(n2, isgn, t(3 * n2), ip, w) else call ddst(n2, isgn, t, ip, w) call ddst(n2, isgn, t(n2), ip, w) call ddst(n2, isgn, t(2 * n2), ip, w) call ddst(n2, isgn, t(3 * n2), ip, w) end if do j = 0, n2 - 1 a(i, j) = t(j) a(i + 1, j) = t(n2 + j) a(i + 2, j) = t(2 * n2 + j) a(i + 3, j) = t(3 * n2 + j) end do end do else if (n1 .eq. 2) then do j = 0, n2 - 1 t(j) = a(0, j) t(n2 + j) = a(1, j) end do if (ics .eq. 0) then call ddct(n2, isgn, t, ip, w) call ddct(n2, isgn, t(n2), ip, w) else call ddst(n2, isgn, t, ip, w) call ddst(n2, isgn, t(n2), ip, w) end if do j = 0, n2 - 1 a(0, j) = t(j) a(1, j) = t(n2 + j) end do end if end ! OouraFFT-1.0/fftsg3d.c000066400000000000000000001626761246725775500145510ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :three data length :power of 2 decimation :frequency radix :split-radix, row-column data :inplace table :use functions cdft3d: Complex Discrete Fourier Transform rdft3d: Real Discrete Fourier Transform ddct3d: Discrete Cosine Transform ddst3d: Discrete Sine Transform function prototypes void cdft3d(int, int, int, int, double ***, double *, int *, double *); void rdft3d(int, int, int, int, double ***, double *, int *, double *); void rdft3dsort(int, int, int, int, double ***); void ddct3d(int, int, int, int, double ***, double *, int *, double *); void ddst3d(int, int, int, int, double ***, double *, int *, double *); necessary package fftsg.c : 1D-FFT package macro definitions USE_FFT3D_PTHREADS : default=not defined FFT3D_MAX_THREADS : must be 2^N, default=4 FFT3D_THREADS_BEGIN_N : default=65536 USE_FFT3D_WINTHREADS : default=not defined FFT3D_MAX_THREADS : must be 2^N, default=4 FFT3D_THREADS_BEGIN_N : default=131072 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 x[j1][j2][j3] * exp(2*pi*i*j1*k1/n1) * exp(2*pi*i*j2*k2/n2) * exp(2*pi*i*j3*k3/n3), 0<=k1 X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 x[j1][j2][j3] * exp(-2*pi*i*j1*k1/n1) * exp(-2*pi*i*j2*k2/n2) * exp(-2*pi*i*j3*k3/n3), 0<=k1 ip[0] = 0; // first time only cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); ip[0] = 0; // first time only cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 1, n1 = power of 2 n2 :data length (int) n2 >= 1, n2 = power of 2 2*n3 :data length (int) n3 >= 1, n3 = power of 2 a[0...n1-1][0...n2-1][0...2*n3-1] :input/output data (double ***) input data a[j1][j2][2*j3] = Re(x[j1][j2][j3]), a[j1][j2][2*j3+1] = Im(x[j1][j2][j3]), 0<=j1= max(8*n1, 8*n2), if single thread, length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1, n2, n3)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/2, n3/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of cdft3d(n1, n2, 2*n3, -1, a, t, ip, w); is cdft3d(n1, n2, 2*n3, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= 2 * n3 - 1; j3++) { a[j1][j2][j3] *= 1.0 / n1 / n2 / n3; } } } . -------- Real DFT / Inverse of Real DFT -------- [definition] RDFT R[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 a[j1][j2][j3] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + 2*pi*j3*k3/n3), 0<=k1 IRDFT (excluding scale) a[k1][k2][k3] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 (R[j1][j2][j3] * cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + 2*pi*j3*k3/n3) + I[j1][j2][j3] * sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + 2*pi*j3*k3/n3)), 0<=k1 ip[0] = 0; // first time only rdft3d(n1, n2, n3, 1, a, t, ip, w); ip[0] = 0; // first time only rdft3d(n1, n2, n3, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 n3 :data length (int) n3 >= 2, n3 = power of 2 a[0...n1-1][0...n2-1][0...n3-1] :input/output data (double ***) output data a[k1][k2][2*k3] = R[k1][k2][k3] = R[(n1-k1)%n1][(n2-k2)%n2][n3-k3], a[k1][k2][2*k3+1] = I[k1][k2][k3] = -I[(n1-k1)%n1][(n2-k2)%n2][n3-k3], 0<=k1 input data a[j1][j2][2*j3] = R[j1][j2][j3] = R[(n1-j1)%n1][(n2-j2)%n2][n3-j3], a[j1][j2][2*j3+1] = I[j1][j2][j3] = -I[(n1-j1)%n1][(n2-j2)%n2][n3-j3], 0<=j1= max(8*n1, 8*n2), if single thread, length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1, n2, n3/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1/2, n2/2, n3/4) + n3/4 w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of rdft3d(n1, n2, n3, 1, a, t, ip, w); is rdft3d(n1, n2, n3, -1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][j2][j3] *= 2.0 / n1 / n2 / n3; } } } . -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] IDCT (excluding scale) C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 a[j1][j2][j3] * cos(pi*j1*(k1+1/2)/n1) * cos(pi*j2*(k2+1/2)/n2) * cos(pi*j3*(k3+1/2)/n3), 0<=k1 DCT C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 a[j1][j2][j3] * cos(pi*(j1+1/2)*k1/n1) * cos(pi*(j2+1/2)*k2/n2) * cos(pi*(j3+1/2)*k3/n3), 0<=k1 ip[0] = 0; // first time only ddct3d(n1, n2, n3, 1, a, t, ip, w); ip[0] = 0; // first time only ddct3d(n1, n2, n3, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 n3 :data length (int) n3 >= 2, n3 = power of 2 a[0...n1-1][0...n2-1][0...n3-1] :input/output data (double ***) output data a[k1][k2][k3] = C[k1][k2][k3], 0<=k1= max(4*n1, 4*n2), if single thread, length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1/2, n2/2, n3/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1*3/2, n2*3/2, n3*3/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddct3d(n1, n2, n3, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2][0] *= 0.5; } for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][0][j3] *= 0.5; } } for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[0][j2][j3] *= 0.5; } } ddct3d(n1, n2, n3, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; } } } . -------- DST (Discrete Sine Transform) / Inverse of DST -------- [definition] IDST (excluding scale) S[k1][k2][k3] = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 A[j1][j2][j3] * sin(pi*j1*(k1+1/2)/n1) * sin(pi*j2*(k2+1/2)/n2) * sin(pi*j3*(k3+1/2)/n3), 0<=k1 DST S[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 a[j1][j2][j3] * sin(pi*(j1+1/2)*k1/n1) * sin(pi*(j2+1/2)*k2/n2) * sin(pi*(j3+1/2)*k3/n3), 0 ip[0] = 0; // first time only ddst3d(n1, n2, n3, 1, a, t, ip, w); ip[0] = 0; // first time only ddst3d(n1, n2, n3, -1, a, t, ip, w); [parameters] n1 :data length (int) n1 >= 2, n1 = power of 2 n2 :data length (int) n2 >= 2, n2 = power of 2 n3 :data length (int) n3 >= 2, n3 = power of 2 a[0...n1-1][0...n2-1][0...n3-1] :input/output data (double ***) input data a[j1%n1][j2%n2][j3%n3] = A[j1][j2][j3], 0 output data a[k1%n1][k2%n2][k3%n3] = S[k1][k2][k3], 0= max(4*n1, 4*n2), if single thread, length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS, if multi threads, t is dynamically allocated, if t == NULL. ip[0...*] :work area for bit reversal (int *) length of ip >= 2+sqrt(n) (n = max(n1/2, n2/2, n3/2)) ip[0],ip[1] are pointers of the cos/sin table. w[0...*] :cos/sin table (double *) length of w >= max(n1*3/2, n2*3/2, n3*3/2) w[],ip[] are initialized if ip[0] == 0. [remark] Inverse of ddst3d(n1, n2, n3, -1, a, t, ip, w); is for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2][0] *= 0.5; } for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][0][j3] *= 0.5; } } for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[0][j2][j3] *= 0.5; } } ddst3d(n1, n2, n3, 1, a, t, ip, w); for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][j2][j3] *= 8.0 / n1 / n2 / n3; } } } . */ #include #include #define fft3d_alloc_error_check(p) { \ if ((p) == NULL) { \ fprintf(stderr, "fft3d memory allocation error\n"); \ exit(1); \ } \ } #ifdef USE_FFT3D_PTHREADS #define USE_FFT3D_THREADS #ifndef FFT3D_MAX_THREADS #define FFT3D_MAX_THREADS 4 #endif #ifndef FFT3D_THREADS_BEGIN_N #define FFT3D_THREADS_BEGIN_N 65536 #endif #include #define fft3d_thread_t pthread_t #define fft3d_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \ fprintf(stderr, "fft3d thread error\n"); \ exit(1); \ } \ } #define fft3d_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "fft3d thread error\n"); \ exit(1); \ } \ } #endif /* USE_FFT3D_PTHREADS */ #ifdef USE_FFT3D_WINTHREADS #define USE_FFT3D_THREADS #ifndef FFT3D_MAX_THREADS #define FFT3D_MAX_THREADS 4 #endif #ifndef FFT3D_THREADS_BEGIN_N #define FFT3D_THREADS_BEGIN_N 131072 #endif #include #define fft3d_thread_t HANDLE #define fft3d_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "fft3d thread error\n"); \ exit(1); \ } \ } #define fft3d_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_FFT3D_WINTHREADS */ void cdft3d(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w); void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w); #ifdef USE_FFT3D_THREADS void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w); void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w); #endif /* USE_FFT3D_THREADS */ int n, itnull, nt; n = n1; if (n < n2) { n = n2; } n <<= 1; if (n < n3) { n = n3; } if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } itnull = 0; if (t == NULL) { itnull = 1; nt = n1; if (nt < n2) { nt = n2; } nt *= 8; #ifdef USE_FFT3D_THREADS nt *= FFT3D_MAX_THREADS; #endif /* USE_FFT3D_THREADS */ if (n3 == 4) { nt >>= 1; } else if (n3 < 4) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft3d_alloc_error_check(t); } #ifdef USE_FFT3D_THREADS if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); } else #endif /* USE_FFT3D_THREADS */ { xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } void rdft3d(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w); void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w); void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a); #ifdef USE_FFT3D_THREADS void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w); void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w); #endif /* USE_FFT3D_THREADS */ int n, nw, nc, itnull, nt; n = n1; if (n < n2) { n = n2; } n <<= 1; if (n < n3) { n = n3; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n3 > (nc << 2)) { nc = n3 >> 2; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nt = n1; if (nt < n2) { nt = n2; } nt *= 8; #ifdef USE_FFT3D_THREADS nt *= FFT3D_MAX_THREADS; #endif /* USE_FFT3D_THREADS */ if (n3 == 4) { nt >>= 1; } else if (n3 < 4) { nt >>= 2; } t = (double *) malloc(sizeof(double) * nt); fft3d_alloc_error_check(t); } #ifdef USE_FFT3D_THREADS if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { if (isgn < 0) { rdft3d_sub(n1, n2, n3, isgn, a); cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); } xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); if (isgn >= 0) { cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w); rdft3d_sub(n1, n2, n3, isgn, a); } } else #endif /* USE_FFT3D_THREADS */ { if (isgn < 0) { rdft3d_sub(n1, n2, n3, isgn, a); cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); } xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); if (isgn >= 0) { cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w); rdft3d_sub(n1, n2, n3, isgn, a); } } if (itnull != 0) { free(t); } } void rdft3dsort(int n1, int n2, int n3, int isgn, double ***a) { int n1h, n2h, i, j; double x, y; n1h = n1 >> 1; n2h = n2 >> 1; if (isgn < 0) { for (i = 0; i < n1; i++) { for (j = n2h + 1; j < n2; j++) { a[i][j][0] = a[i][j][n3 + 1]; a[i][j][1] = a[i][j][n3]; } } for (i = n1h + 1; i < n1; i++) { a[i][0][0] = a[i][0][n3 + 1]; a[i][0][1] = a[i][0][n3]; a[i][n2h][0] = a[i][n2h][n3 + 1]; a[i][n2h][1] = a[i][n2h][n3]; } a[0][0][1] = a[0][0][n3]; a[0][n2h][1] = a[0][n2h][n3]; a[n1h][0][1] = a[n1h][0][n3]; a[n1h][n2h][1] = a[n1h][n2h][n3]; } else { for (j = n2h + 1; j < n2; j++) { y = a[0][j][0]; x = a[0][j][1]; a[0][j][n3] = x; a[0][j][n3 + 1] = y; a[0][n2 - j][n3] = x; a[0][n2 - j][n3 + 1] = -y; a[0][j][0] = a[0][n2 - j][0]; a[0][j][1] = -a[0][n2 - j][1]; } for (i = 1; i < n1; i++) { for (j = n2h + 1; j < n2; j++) { y = a[i][j][0]; x = a[i][j][1]; a[i][j][n3] = x; a[i][j][n3 + 1] = y; a[n1 - i][n2 - j][n3] = x; a[n1 - i][n2 - j][n3 + 1] = -y; a[i][j][0] = a[n1 - i][n2 - j][0]; a[i][j][1] = -a[n1 - i][n2 - j][1]; } } for (i = n1h + 1; i < n1; i++) { y = a[i][0][0]; x = a[i][0][1]; a[i][0][n3] = x; a[i][0][n3 + 1] = y; a[n1 - i][0][n3] = x; a[n1 - i][0][n3 + 1] = -y; a[i][0][0] = a[n1 - i][0][0]; a[i][0][1] = -a[n1 - i][0][1]; y = a[i][n2h][0]; x = a[i][n2h][1]; a[i][n2h][n3] = x; a[i][n2h][n3 + 1] = y; a[n1 - i][n2h][n3] = x; a[n1 - i][n2h][n3 + 1] = -y; a[i][n2h][0] = a[n1 - i][n2h][0]; a[i][n2h][1] = -a[n1 - i][n2h][1]; } a[0][0][n3] = a[0][0][1]; a[0][0][n3 + 1] = 0; a[0][0][1] = 0; a[0][n2h][n3] = a[0][n2h][1]; a[0][n2h][n3 + 1] = 0; a[0][n2h][1] = 0; a[n1h][0][n3] = a[n1h][0][1]; a[n1h][0][n3 + 1] = 0; a[n1h][0][1] = 0; a[n1h][n2h][n3] = a[n1h][n2h][1]; a[n1h][n2h][n3 + 1] = 0; a[n1h][n2h][1] = 0; } } void ddct3d(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); #ifdef USE_FFT3D_THREADS void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); #endif /* USE_FFT3D_THREADS */ int n, nw, nc, itnull, nt; n = n1; if (n < n2) { n = n2; } if (n < n3) { n = n3; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nt = n1; if (nt < n2) { nt = n2; } nt *= 4; #ifdef USE_FFT3D_THREADS nt *= FFT3D_MAX_THREADS; #endif /* USE_FFT3D_THREADS */ if (n3 == 2) { nt >>= 1; } t = (double *) malloc(sizeof(double) * nt); fft3d_alloc_error_check(t); } #ifdef USE_FFT3D_THREADS if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w); ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w); } else #endif /* USE_FFT3D_THREADS */ { ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w); ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } void ddst3d(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void makewt(int nw, int *ip, double *w); void makect(int nc, int *ip, double *c); void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); #ifdef USE_FFT3D_THREADS void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w); #endif /* USE_FFT3D_THREADS */ int n, nw, nc, itnull, nt; n = n1; if (n < n2) { n = n2; } if (n < n3) { n = n3; } nw = ip[0]; if (n > (nw << 2)) { nw = n >> 2; makewt(nw, ip, w); } nc = ip[1]; if (n > nc) { nc = n; makect(nc, ip, w + nw); } itnull = 0; if (t == NULL) { itnull = 1; nt = n1; if (nt < n2) { nt = n2; } nt *= 4; #ifdef USE_FFT3D_THREADS nt *= FFT3D_MAX_THREADS; #endif /* USE_FFT3D_THREADS */ if (n3 == 2) { nt >>= 1; } t = (double *) malloc(sizeof(double) * nt); fft3d_alloc_error_check(t); } #ifdef USE_FFT3D_THREADS if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) { ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w); ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w); } else #endif /* USE_FFT3D_THREADS */ { ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w); ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w); } if (itnull != 0) { free(t); } } /* -------- child routines -------- */ void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w) { void cdft(int n, int isgn, double *a, int *ip, double *w); void rdft(int n, int isgn, double *a, int *ip, double *w); int i, j, k; for (i = 0; i < n1; i++) { if (icr == 0) { for (j = 0; j < n2; j++) { cdft(n3, isgn, a[i][j], ip, w); } } else if (isgn >= 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } if (n3 > 4) { for (k = 0; k < n3; k += 8) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][k]; t[2 * j + 1] = a[i][j][k + 1]; t[2 * n2 + 2 * j] = a[i][j][k + 2]; t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; t[4 * n2 + 2 * j] = a[i][j][k + 4]; t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; t[6 * n2 + 2 * j] = a[i][j][k + 6]; t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); cdft(2 * n2, isgn, &t[4 * n2], ip, w); cdft(2 * n2, isgn, &t[6 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][k] = t[2 * j]; a[i][j][k + 1] = t[2 * j + 1]; a[i][j][k + 2] = t[2 * n2 + 2 * j]; a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; a[i][j][k + 4] = t[4 * n2 + 2 * j]; a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; a[i][j][k + 6] = t[6 * n2 + 2 * j]; a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; } } } else if (n3 == 4) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; t[2 * n2 + 2 * j] = a[i][j][2]; t[2 * n2 + 2 * j + 1] = a[i][j][3]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; a[i][j][2] = t[2 * n2 + 2 * j]; a[i][j][3] = t[2 * n2 + 2 * j + 1]; } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; } cdft(2 * n2, isgn, t, ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; } } if (icr != 0 && isgn < 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } } } void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void cdft(int n, int isgn, double *a, int *ip, double *w); int i, j, k; if (n3 > 4) { for (j = 0; j < n2; j++) { for (k = 0; k < n3; k += 8) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][k]; t[2 * i + 1] = a[i][j][k + 1]; t[2 * n1 + 2 * i] = a[i][j][k + 2]; t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; t[4 * n1 + 2 * i] = a[i][j][k + 4]; t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; t[6 * n1 + 2 * i] = a[i][j][k + 6]; t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); cdft(2 * n1, isgn, &t[4 * n1], ip, w); cdft(2 * n1, isgn, &t[6 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][k] = t[2 * i]; a[i][j][k + 1] = t[2 * i + 1]; a[i][j][k + 2] = t[2 * n1 + 2 * i]; a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; a[i][j][k + 4] = t[4 * n1 + 2 * i]; a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; a[i][j][k + 6] = t[6 * n1 + 2 * i]; a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; } } } } else if (n3 == 4) { for (j = 0; j < n2; j++) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; t[2 * n1 + 2 * i] = a[i][j][2]; t[2 * n1 + 2 * i + 1] = a[i][j][3]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; a[i][j][2] = t[2 * n1 + 2 * i]; a[i][j][3] = t[2 * n1 + 2 * i + 1]; } } } else if (n3 == 2) { for (j = 0; j < n2; j++) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; } cdft(2 * n1, isgn, t, ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; } } } } void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a) { int n1h, n2h, i, j, k, l; double xi; n1h = n1 >> 1; n2h = n2 >> 1; if (isgn < 0) { for (i = 1; i < n1h; i++) { j = n1 - i; xi = a[i][0][0] - a[j][0][0]; a[i][0][0] += a[j][0][0]; a[j][0][0] = xi; xi = a[j][0][1] - a[i][0][1]; a[i][0][1] += a[j][0][1]; a[j][0][1] = xi; xi = a[i][n2h][0] - a[j][n2h][0]; a[i][n2h][0] += a[j][n2h][0]; a[j][n2h][0] = xi; xi = a[j][n2h][1] - a[i][n2h][1]; a[i][n2h][1] += a[j][n2h][1]; a[j][n2h][1] = xi; for (k = 1; k < n2h; k++) { l = n2 - k; xi = a[i][k][0] - a[j][l][0]; a[i][k][0] += a[j][l][0]; a[j][l][0] = xi; xi = a[j][l][1] - a[i][k][1]; a[i][k][1] += a[j][l][1]; a[j][l][1] = xi; xi = a[j][k][0] - a[i][l][0]; a[j][k][0] += a[i][l][0]; a[i][l][0] = xi; xi = a[i][l][1] - a[j][k][1]; a[j][k][1] += a[i][l][1]; a[i][l][1] = xi; } } for (k = 1; k < n2h; k++) { l = n2 - k; xi = a[0][k][0] - a[0][l][0]; a[0][k][0] += a[0][l][0]; a[0][l][0] = xi; xi = a[0][l][1] - a[0][k][1]; a[0][k][1] += a[0][l][1]; a[0][l][1] = xi; xi = a[n1h][k][0] - a[n1h][l][0]; a[n1h][k][0] += a[n1h][l][0]; a[n1h][l][0] = xi; xi = a[n1h][l][1] - a[n1h][k][1]; a[n1h][k][1] += a[n1h][l][1]; a[n1h][l][1] = xi; } } else { for (i = 1; i < n1h; i++) { j = n1 - i; a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]); a[i][0][0] -= a[j][0][0]; a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]); a[i][0][1] -= a[j][0][1]; a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]); a[i][n2h][0] -= a[j][n2h][0]; a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]); a[i][n2h][1] -= a[j][n2h][1]; for (k = 1; k < n2h; k++) { l = n2 - k; a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]); a[i][k][0] -= a[j][l][0]; a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]); a[i][k][1] -= a[j][l][1]; a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]); a[j][k][0] -= a[i][l][0]; a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]); a[j][k][1] -= a[i][l][1]; } } for (k = 1; k < n2h; k++) { l = n2 - k; a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]); a[0][k][0] -= a[0][l][0]; a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]); a[0][k][1] -= a[0][l][1]; a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]); a[n1h][k][0] -= a[n1h][l][0]; a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]); a[n1h][k][1] -= a[n1h][l][1]; } } } void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int i, j, k; for (i = 0; i < n1; i++) { if (ics == 0) { for (j = 0; j < n2; j++) { ddct(n3, isgn, a[i][j], ip, w); } } else { for (j = 0; j < n2; j++) { ddst(n3, isgn, a[i][j], ip, w); } } if (n3 > 2) { for (k = 0; k < n3; k += 4) { for (j = 0; j < n2; j++) { t[j] = a[i][j][k]; t[n2 + j] = a[i][j][k + 1]; t[2 * n2 + j] = a[i][j][k + 2]; t[3 * n2 + j] = a[i][j][k + 3]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); ddct(n2, isgn, &t[2 * n2], ip, w); ddct(n2, isgn, &t[3 * n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); ddst(n2, isgn, &t[2 * n2], ip, w); ddst(n2, isgn, &t[3 * n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][k] = t[j]; a[i][j][k + 1] = t[n2 + j]; a[i][j][k + 2] = t[2 * n2 + j]; a[i][j][k + 3] = t[3 * n2 + j]; } } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[j] = a[i][j][0]; t[n2 + j] = a[i][j][1]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][0] = t[j]; a[i][j][1] = t[n2 + j]; } } } } void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int i, j, k; if (n3 > 2) { for (j = 0; j < n2; j++) { for (k = 0; k < n3; k += 4) { for (i = 0; i < n1; i++) { t[i] = a[i][j][k]; t[n1 + i] = a[i][j][k + 1]; t[2 * n1 + i] = a[i][j][k + 2]; t[3 * n1 + i] = a[i][j][k + 3]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); ddct(n1, isgn, &t[2 * n1], ip, w); ddct(n1, isgn, &t[3 * n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); ddst(n1, isgn, &t[2 * n1], ip, w); ddst(n1, isgn, &t[3 * n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][k] = t[i]; a[i][j][k + 1] = t[n1 + i]; a[i][j][k + 2] = t[2 * n1 + i]; a[i][j][k + 3] = t[3 * n1 + i]; } } } } else if (n3 == 2) { for (j = 0; j < n2; j++) { for (i = 0; i < n1; i++) { t[i] = a[i][j][0]; t[n1 + i] = a[i][j][1]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][0] = t[i]; a[i][j][1] = t[n1 + i]; } } } } #ifdef USE_FFT3D_THREADS struct fft3d_arg_st { int nthread; int n0; int n1; int n2; int n3; int ic; int isgn; double ***a; double *t; int *ip; double *w; }; typedef struct fft3d_arg_st fft3d_arg_t; void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn, double ***a, double *t, int *ip, double *w) { void *xdft3da_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n1) { nthread = n1; } nt = 8 * n2; if (n3 == 4) { nt >>= 1; } else if (n3 < 4) { nt >>= 2; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].ic = icr; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], xdft3da_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); } } void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a, double *t, int *ip, double *w) { void *cdft3db_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n2) { nthread = n2; } nt = 8 * n1; if (n3 == 4) { nt >>= 1; } else if (n3 < 4) { nt >>= 2; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], cdft3db_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); } } void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w) { void *ddxt3da_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n1) { nthread = n1; } nt = 4 * n2; if (n3 == 2) { nt >>= 1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); } } void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn, double ***a, double *t, int *ip, double *w) { void *ddxt3db_th(void *p); fft3d_thread_t th[FFT3D_MAX_THREADS]; fft3d_arg_t ag[FFT3D_MAX_THREADS]; int nthread, nt, i; nthread = FFT3D_MAX_THREADS; if (nthread > n2) { nthread = n2; } nt = 4 * n1; if (n3 == 2) { nt >>= 1; } for (i = 0; i < nthread; i++) { ag[i].nthread = nthread; ag[i].n0 = i; ag[i].n1 = n1; ag[i].n2 = n2; ag[i].n3 = n3; ag[i].ic = ics; ag[i].isgn = isgn; ag[i].a = a; ag[i].t = &t[nt * i]; ag[i].ip = ip; ag[i].w = w; fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]); } for (i = 0; i < nthread; i++) { fft3d_thread_wait(th[i]); } } void *xdft3da_th(void *p) { void cdft(int n, int isgn, double *a, int *ip, double *w); void rdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; icr = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; for (i = n0; i < n1; i += nthread) { if (icr == 0) { for (j = 0; j < n2; j++) { cdft(n3, isgn, a[i][j], ip, w); } } else if (isgn >= 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } if (n3 > 4) { for (k = 0; k < n3; k += 8) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][k]; t[2 * j + 1] = a[i][j][k + 1]; t[2 * n2 + 2 * j] = a[i][j][k + 2]; t[2 * n2 + 2 * j + 1] = a[i][j][k + 3]; t[4 * n2 + 2 * j] = a[i][j][k + 4]; t[4 * n2 + 2 * j + 1] = a[i][j][k + 5]; t[6 * n2 + 2 * j] = a[i][j][k + 6]; t[6 * n2 + 2 * j + 1] = a[i][j][k + 7]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); cdft(2 * n2, isgn, &t[4 * n2], ip, w); cdft(2 * n2, isgn, &t[6 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][k] = t[2 * j]; a[i][j][k + 1] = t[2 * j + 1]; a[i][j][k + 2] = t[2 * n2 + 2 * j]; a[i][j][k + 3] = t[2 * n2 + 2 * j + 1]; a[i][j][k + 4] = t[4 * n2 + 2 * j]; a[i][j][k + 5] = t[4 * n2 + 2 * j + 1]; a[i][j][k + 6] = t[6 * n2 + 2 * j]; a[i][j][k + 7] = t[6 * n2 + 2 * j + 1]; } } } else if (n3 == 4) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; t[2 * n2 + 2 * j] = a[i][j][2]; t[2 * n2 + 2 * j + 1] = a[i][j][3]; } cdft(2 * n2, isgn, t, ip, w); cdft(2 * n2, isgn, &t[2 * n2], ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; a[i][j][2] = t[2 * n2 + 2 * j]; a[i][j][3] = t[2 * n2 + 2 * j + 1]; } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[2 * j] = a[i][j][0]; t[2 * j + 1] = a[i][j][1]; } cdft(2 * n2, isgn, t, ip, w); for (j = 0; j < n2; j++) { a[i][j][0] = t[2 * j]; a[i][j][1] = t[2 * j + 1]; } } if (icr != 0 && isgn < 0) { for (j = 0; j < n2; j++) { rdft(n3, isgn, a[i][j], ip, w); } } } return (void *) 0; } void *cdft3db_th(void *p) { void cdft(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; if (n3 > 4) { for (j = n0; j < n2; j += nthread) { for (k = 0; k < n3; k += 8) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][k]; t[2 * i + 1] = a[i][j][k + 1]; t[2 * n1 + 2 * i] = a[i][j][k + 2]; t[2 * n1 + 2 * i + 1] = a[i][j][k + 3]; t[4 * n1 + 2 * i] = a[i][j][k + 4]; t[4 * n1 + 2 * i + 1] = a[i][j][k + 5]; t[6 * n1 + 2 * i] = a[i][j][k + 6]; t[6 * n1 + 2 * i + 1] = a[i][j][k + 7]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); cdft(2 * n1, isgn, &t[4 * n1], ip, w); cdft(2 * n1, isgn, &t[6 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][k] = t[2 * i]; a[i][j][k + 1] = t[2 * i + 1]; a[i][j][k + 2] = t[2 * n1 + 2 * i]; a[i][j][k + 3] = t[2 * n1 + 2 * i + 1]; a[i][j][k + 4] = t[4 * n1 + 2 * i]; a[i][j][k + 5] = t[4 * n1 + 2 * i + 1]; a[i][j][k + 6] = t[6 * n1 + 2 * i]; a[i][j][k + 7] = t[6 * n1 + 2 * i + 1]; } } } } else if (n3 == 4) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; t[2 * n1 + 2 * i] = a[i][j][2]; t[2 * n1 + 2 * i + 1] = a[i][j][3]; } cdft(2 * n1, isgn, t, ip, w); cdft(2 * n1, isgn, &t[2 * n1], ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; a[i][j][2] = t[2 * n1 + 2 * i]; a[i][j][3] = t[2 * n1 + 2 * i + 1]; } } } else if (n3 == 2) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[2 * i] = a[i][j][0]; t[2 * i + 1] = a[i][j][1]; } cdft(2 * n1, isgn, t, ip, w); for (i = 0; i < n1; i++) { a[i][j][0] = t[2 * i]; a[i][j][1] = t[2 * i + 1]; } } } return (void *) 0; } void *ddxt3da_th(void *p) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; ics = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; for (i = n0; i < n1; i += nthread) { if (ics == 0) { for (j = 0; j < n2; j++) { ddct(n3, isgn, a[i][j], ip, w); } } else { for (j = 0; j < n2; j++) { ddst(n3, isgn, a[i][j], ip, w); } } if (n3 > 2) { for (k = 0; k < n3; k += 4) { for (j = 0; j < n2; j++) { t[j] = a[i][j][k]; t[n2 + j] = a[i][j][k + 1]; t[2 * n2 + j] = a[i][j][k + 2]; t[3 * n2 + j] = a[i][j][k + 3]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); ddct(n2, isgn, &t[2 * n2], ip, w); ddct(n2, isgn, &t[3 * n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); ddst(n2, isgn, &t[2 * n2], ip, w); ddst(n2, isgn, &t[3 * n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][k] = t[j]; a[i][j][k + 1] = t[n2 + j]; a[i][j][k + 2] = t[2 * n2 + j]; a[i][j][k + 3] = t[3 * n2 + j]; } } } else if (n3 == 2) { for (j = 0; j < n2; j++) { t[j] = a[i][j][0]; t[n2 + j] = a[i][j][1]; } if (ics == 0) { ddct(n2, isgn, t, ip, w); ddct(n2, isgn, &t[n2], ip, w); } else { ddst(n2, isgn, t, ip, w); ddst(n2, isgn, &t[n2], ip, w); } for (j = 0; j < n2; j++) { a[i][j][0] = t[j]; a[i][j][1] = t[n2 + j]; } } } return (void *) 0; } void *ddxt3db_th(void *p) { void ddct(int n, int isgn, double *a, int *ip, double *w); void ddst(int n, int isgn, double *a, int *ip, double *w); int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k; double ***a, *t, *w; nthread = ((fft3d_arg_t *) p)->nthread; n0 = ((fft3d_arg_t *) p)->n0; n1 = ((fft3d_arg_t *) p)->n1; n2 = ((fft3d_arg_t *) p)->n2; n3 = ((fft3d_arg_t *) p)->n3; ics = ((fft3d_arg_t *) p)->ic; isgn = ((fft3d_arg_t *) p)->isgn; a = ((fft3d_arg_t *) p)->a; t = ((fft3d_arg_t *) p)->t; ip = ((fft3d_arg_t *) p)->ip; w = ((fft3d_arg_t *) p)->w; if (n3 > 2) { for (j = n0; j < n2; j += nthread) { for (k = 0; k < n3; k += 4) { for (i = 0; i < n1; i++) { t[i] = a[i][j][k]; t[n1 + i] = a[i][j][k + 1]; t[2 * n1 + i] = a[i][j][k + 2]; t[3 * n1 + i] = a[i][j][k + 3]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); ddct(n1, isgn, &t[2 * n1], ip, w); ddct(n1, isgn, &t[3 * n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); ddst(n1, isgn, &t[2 * n1], ip, w); ddst(n1, isgn, &t[3 * n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][k] = t[i]; a[i][j][k + 1] = t[n1 + i]; a[i][j][k + 2] = t[2 * n1 + i]; a[i][j][k + 3] = t[3 * n1 + i]; } } } } else if (n3 == 2) { for (j = n0; j < n2; j += nthread) { for (i = 0; i < n1; i++) { t[i] = a[i][j][0]; t[n1 + i] = a[i][j][1]; } if (ics == 0) { ddct(n1, isgn, t, ip, w); ddct(n1, isgn, &t[n1], ip, w); } else { ddst(n1, isgn, t, ip, w); ddst(n1, isgn, &t[n1], ip, w); } for (i = 0; i < n1; i++) { a[i][j][0] = t[i]; a[i][j][1] = t[n1 + i]; } } } return (void *) 0; } #endif /* USE_FFT3D_THREADS */ OouraFFT-1.0/fftsg3d.f000066400000000000000000001120601246725775500145320ustar00rootroot00000000000000! Fast Fourier/Cosine/Sine Transform ! dimension :three ! data length :power of 2 ! decimation :frequency ! radix :split-radix, row-column ! data :inplace ! table :use ! subroutines ! cdft3d: Complex Discrete Fourier Transform ! rdft3d: Real Discrete Fourier Transform ! ddct3d: Discrete Cosine Transform ! ddst3d: Discrete Sine Transform ! necessary package ! fftsg.f : 1D-FFT package ! ! ! -------- Complex DFT (Discrete Fourier Transform) -------- ! [definition] ! ! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! x(j1,j2,j3) * ! exp(2*pi*i*j1*k1/n1) * ! exp(2*pi*i*j2*k2/n2) * ! exp(2*pi*i*j3*k3/n3), ! 0<=k1 ! X(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! x(j1,j2,j3) * ! exp(-2*pi*i*j1*k1/n1) * ! exp(-2*pi*i*j2*k2/n2) * ! exp(-2*pi*i*j3*k3/n3), ! 0<=k1 ! ip(0) = 0 ! first time only ! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) ! [parameters] ! n1max :row1 size of the 3D array (integer) ! n2max :row2 size of the 3D array (integer) ! 2*n1 :data length (integer) ! n1 >= 1, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 1, n2 = power of 2 ! n3 :data length (integer) ! n3 >= 1, n3 = power of 2 ! a(0:2*n1-1,0:n2-1,0:n3-1) ! :input/output data (real*8) ! input data ! a(2*j1,j2,j3) = Re(x(j1,j2,j3)), ! a(2*j1+1,j2,j3) = Im(x(j1,j2,j3)), ! 0<=j1= max(8*n2, 8*n3) ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1, n2, n3)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/2, n2/2, n3/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call cdft3d(n1max, n2max, 2*n1, n2, n3, -1, a, t, ip, w) ! is ! call cdft3d(n1max, n2max, 2*n1, n2, n3, 1, a, t, ip, w) ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! do j1 = 0, 2 * n1 - 1 ! a(j1,j2,j3) = a(j1,j2,j3) * (1.0d0/n1/n2/n3) ! end do ! end do ! end do ! . ! ! ! -------- Real DFT / Inverse of Real DFT -------- ! [definition] ! RDFT ! R(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! a(j1,j2,j3) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + ! 2*pi*j3*k3/n3), ! 0<=k1 IRDFT (excluding scale) ! a(k1,k2,k3) = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! (R(j1,j2,j3) * ! cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + ! 2*pi*j3*k3/n3) + ! I(j1,j2,j3) * ! sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 + ! 2*pi*j3*k3/n3)), ! 0<=k1 ! ip(0) = 0 ! first time only ! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! [parameters] ! n1max :row1 size of the 3D array (integer) ! n2max :row2 size of the 3D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! n3 :data length (integer) ! n3 >= 2, n3 = power of 2 ! a(0:n1-1,0:n2-1,0:n3-1) ! :input/output data (real*8) ! ! output data ! a(2*k1,k2,k3) = R(k1,k2,k3) ! = R(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), ! a(2*k1+1,k2,k3) = I(k1,k2,k3) ! = -I(n1-k1,mod(n2-k2,n2),mod(n3-k3,n3)), ! 0 ! input data ! a(2*j1,j2,j3) = R(j1,j2,j3) ! = R(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), ! a(2*j1+1,j2,j3) = I(j1,j2,j3) ! = -I(n1-j1,mod(n2-j2,n2),mod(n3-j3,n3)), ! 0= max(8*n2, 8*n3) ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2, n3)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1/4, n2/2, n3/2) + n1/4 ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call rdft3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! is ! call rdft3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1,j2,j3) = a(j1,j2,j3) * (2.0d0/n1/n2/n3) ! end do ! end do ! end do ! . ! ! ! -------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- ! [definition] ! IDCT (excluding scale) ! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! a(j1,j2,j3) * ! cos(pi*j1*(k1+1/2)/n1) * ! cos(pi*j2*(k2+1/2)/n2) * ! cos(pi*j3*(k3+1/2)/n3), ! 0<=k1 DCT ! C(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! a(j1,j2,j3) * ! cos(pi*(j1+1/2)*k1/n1) * ! cos(pi*(j2+1/2)*k2/n2) * ! cos(pi*(j3+1/2)*k3/n3), ! 0<=k1 ! ip(0) = 0 ! first time only ! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! [parameters] ! n1max :row1 size of the 3D array (integer) ! n2max :row2 size of the 3D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! n3 :data length (integer) ! n3 >= 2, n3 = power of 2 ! a(0:n1-1,0:n2-1,0:n3-1) ! :input/output data (real*8) ! output data ! a(k1,k2,k3) = C(k1,k2,k3), ! 0<=k1= max(4*n2, 4*n3) ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2/2, n3/2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1*3/2, n2*3/2, n3*3/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddct3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! is ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 ! end do ! do j1 = 0, n1 - 1 ! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 ! end do ! end do ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 ! end do ! end do ! call ddct3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) ! end do ! end do ! end do ! . ! ! ! -------- DST (Discrete Sine Transform) / Inverse of DST -------- ! [definition] ! IDST (excluding scale) ! S(k1,k2,k3) = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3 ! A(j1,j2,j3) * ! sin(pi*j1*(k1+1/2)/n1) * ! sin(pi*j2*(k2+1/2)/n2) * ! sin(pi*j3*(k3+1/2)/n3), ! 0<=k1 DST ! S(k1,k2,k3) = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1 ! a(j1,j2,j3) * ! sin(pi*(j1+1/2)*k1/n1) * ! sin(pi*(j2+1/2)*k2/n2) * ! sin(pi*(j3+1/2)*k3/n3), ! 0 ! ip(0) = 0 ! first time only ! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! ! ip(0) = 0 ! first time only ! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! [parameters] ! n1max :row1 size of the 3D array (integer) ! n2max :row2 size of the 3D array (integer) ! n1 :data length (integer) ! n1 >= 2, n1 = power of 2 ! n2 :data length (integer) ! n2 >= 2, n2 = power of 2 ! n3 :data length (integer) ! n3 >= 2, n3 = power of 2 ! a(0:n1-1,0:n2-1,0:n3-1) ! :input/output data (real*8) ! ! input data ! a(mod(j1,n1),mod(j2,n2),mod(j3,n3)) = A(j1,j2,j3), ! 0 ! output data ! a(mod(k1,n1),mod(k2,n2),mod(k3,n3)) = S(k1,k2,k3), ! 0= max(4*n2, 4*n3) ! ip(0:*):work area for bit reversal (integer) ! length of ip >= 2+sqrt(n) ! (n = max(n1/2, n2/2, n3/2)) ! ip(0),ip(1) are pointers of the cos/sin table. ! w(0:*) :cos/sin table (real*8) ! length of w >= max(n1*3/2, n2*3/2, n3*3/2) ! w(),ip() are initialized if ip(0) = 0. ! [remark] ! Inverse of ! call ddst3d(n1max, n2max, n1, n2, n3, -1, a, t, ip, w) ! is ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! a(0, j2, j3) = a(0, j2, j3) * 0.5d0 ! end do ! do j1 = 0, n1 - 1 ! a(j1, 0, j3) = a(j1, 0, j3) * 0.5d0 ! end do ! end do ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1, j2, 0) = a(j1, j2, 0) * 0.5d0 ! end do ! end do ! call ddst3d(n1max, n2max, n1, n2, n3, 1, a, t, ip, w) ! do j3 = 0, n3 - 1 ! do j2 = 0, n2 - 1 ! do j1 = 0, n1 - 1 ! a(j1,j2,j3) = a(j1,j2,j3) * (8.0d0/n1/n2/n3) ! end do ! end do ! end do ! . ! ! subroutine cdft3d(n1max, n2max, n1, n2, n3, isgn, a, & t, ip, w) integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), n real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) n = 2 * max(n2, n3) n = max(n, n1) if (n .gt. 4 * ip(0)) then call makewt(n / 4, ip, w) end if call xdft3da_sub(n1max, n2max, n1, n2, n3, 0, & isgn, a, t, ip, w) call cdft3db_sub(n1max, n2max, n1, n2, n3, & isgn, a, t, ip, w) end ! subroutine rdft3d(n1max, n2max, n1, n2, n3, isgn, a, & t, ip, w) integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), & n, nw, nc real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) n = 2 * max(n2, n3) n = max(n, n1) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n1 .gt. 4 * nc) then nc = n1 / 4 call makect(nc, ip, w(nw)) end if if (isgn .lt. 0) then call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) call cdft3db_sub(n1max, n2max, n1, n2, n3, & isgn, a, t, ip, w) call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, & isgn, a, t, ip, w) else call xdft3da_sub(n1max, n2max, n1, n2, n3, 1, & isgn, a, t, ip, w) call cdft3db_sub(n1max, n2max, n1, n2, n3, & isgn, a, t, ip, w) call rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) end if end ! subroutine rdft3dsort(n1max, n2max, n1, n2, n3, isgn, a) integer n1max, n2max, n1, n2, n3, isgn, n2h, n3h, j, k real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), x, y n2h = n2 / 2 n3h = n3 / 2 if (isgn .lt. 0) then do k = 0, n3 - 1 do j = n2h + 1, n2 - 1 a(0, j, k) = a(n1 + 1, j, k) a(1, j, k) = a(n1, j, k) end do end do do k = n3h + 1, n3 - 1 a(0, 0, k) = a(n1 + 1, 0, k) a(1, 0, k) = a(n1, 0, k) a(0, n2h, k) = a(n1 + 1, n2h, k) a(1, n2h, k) = a(n1, n2h, k) end do a(1, 0, 0) = a(n1, 0, 0) a(1, n2h, 0) = a(n1, n2h, 0) a(1, 0, n3h) = a(n1, 0, n3h) a(1, n2h, n3h) = a(n1, n2h, n3h) else do j = n2h + 1, n2 - 1 y = a(0, j, 0) x = a(1, j, 0) a(n1, j, 0) = x a(n1 + 1, j, 0) = y a(n1, n2 - j, 0) = x a(n1 + 1, n2 - j, 0) = -y a(0, j, 0) = a(0, n2 - j, 0) a(1, j, 0) = -a(1, n2 - j, 0) end do do k = 1, n3 - 1 do j = n2h + 1, n2 - 1 y = a(0, j, k) x = a(1, j, k) a(n1, j, k) = x a(n1 + 1, j, k) = y a(n1, n2 - j, n3 - k) = x a(n1 + 1, n2 - j, n3 - k) = -y a(0, j, k) = a(0, n2 - j, n3 - k) a(1, j, k) = -a(1, n2 - j, n3 - k) end do end do do k = n3h + 1, n3 - 1 y = a(0, 0, k) x = a(1, 0, k) a(n1, 0, k) = x a(n1 + 1, 0, k) = y a(n1, 0, n3 - k) = x a(n1 + 1, 0, n3 - k) = -y a(0, 0, k) = a(0, 0, n3 - k) a(1, 0, k) = -a(1, 0, n3 - k) y = a(0, n2h, k) x = a(1, n2h, k) a(n1, n2h, k) = x a(n1 + 1, n2h, k) = y a(n1, n2h, n3 - k) = x a(n1 + 1, n2h, n3 - k) = -y a(0, n2h, k) = a(0, n2h, n3 - k) a(1, n2h, k) = -a(1, n2h, n3 - k) end do a(n1, 0, 0) = a(1, 0, 0) a(n1 + 1, 0, 0) = 0 a(1, 0, 0) = 0 a(n1, n2h, 0) = a(1, n2h, 0) a(n1 + 1, n2h, 0) = 0 a(1, n2h, 0) = 0 a(n1, 0, n3h) = a(1, 0, n3h) a(n1 + 1, 0, n3h) = 0 a(1, 0, n3h) = 0 a(n1, n2h, n3h) = a(1, n2h, n3h) a(n1 + 1, n2h, n3h) = 0 a(1, n2h, n3h) = 0 end if end ! subroutine ddct3d(n1max, n2max, n1, n2, n3, isgn, a, & t, ip, w) integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), & n, nw, nc real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) n = max(n2, n3) n = max(n, n1) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if call ddxt3da_sub(n1max, n2max, n1, n2, n3, 0, & isgn, a, t, ip, w) call ddxt3db_sub(n1max, n2max, n1, n2, n3, 0, & isgn, a, t, ip, w) end ! subroutine ddst3d(n1max, n2max, n1, n2, n3, isgn, a, & t, ip, w) integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), & n, nw, nc real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) n = max(n2, n3) n = max(n, n1) nw = ip(0) if (n .gt. 4 * nw) then nw = n / 4 call makewt(nw, ip, w) end if nc = ip(1) if (n .gt. nc) then nc = n call makect(nc, ip, w(nw)) end if call ddxt3da_sub(n1max, n2max, n1, n2, n3, 1, & isgn, a, t, ip, w) call ddxt3db_sub(n1max, n2max, n1, n2, n3, 1, & isgn, a, t, ip, w) end ! ! -------- child routines -------- ! subroutine xdft3da_sub(n1max, n2max, n1, n2, n3, icr, & isgn, a, t, ip, w) integer n1max, n2max, n1, n2, n3, icr, isgn, & ip(0 : *), i, j, k real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) do k = 0, n3 - 1 if (icr .eq. 0) then do j = 0, n2 - 1 call cdft(n1, isgn, a(0, j, k), ip, w) end do else if (isgn .ge. 0) then do j = 0, n2 - 1 call rdft(n1, isgn, a(0, j, k), ip, w) end do end if if (n1 .gt. 4) then do i = 0, n1 - 8, 8 do j = 0, n2 - 1 t(2 * j) = a(i, j, k) t(2 * j + 1) = a(i + 1, j, k) t(2 * n2 + 2 * j) = a(i + 2, j, k) t(2 * n2 + 2 * j + 1) = a(i + 3, j, k) t(4 * n2 + 2 * j) = a(i + 4, j, k) t(4 * n2 + 2 * j + 1) = a(i + 5, j, k) t(6 * n2 + 2 * j) = a(i + 6, j, k) t(6 * n2 + 2 * j + 1) = a(i + 7, j, k) end do call cdft(2 * n2, isgn, t, ip, w) call cdft(2 * n2, isgn, t(2 * n2), ip, w) call cdft(2 * n2, isgn, t(4 * n2), ip, w) call cdft(2 * n2, isgn, t(6 * n2), ip, w) do j = 0, n2 - 1 a(i, j, k) = t(2 * j) a(i + 1, j, k) = t(2 * j + 1) a(i + 2, j, k) = t(2 * n2 + 2 * j) a(i + 3, j, k) = t(2 * n2 + 2 * j + 1) a(i + 4, j, k) = t(4 * n2 + 2 * j) a(i + 5, j, k) = t(4 * n2 + 2 * j + 1) a(i + 6, j, k) = t(6 * n2 + 2 * j) a(i + 7, j, k) = t(6 * n2 + 2 * j + 1) end do end do else if (n1 .eq. 4) then do j = 0, n2 - 1 t(2 * j) = a(0, j, k) t(2 * j + 1) = a(1, j, k) t(2 * n2 + 2 * j) = a(2, j, k) t(2 * n2 + 2 * j + 1) = a(3, j, k) end do call cdft(2 * n2, isgn, t, ip, w) call cdft(2 * n2, isgn, t(2 * n2), ip, w) do j = 0, n2 - 1 a(0, j, k) = t(2 * j) a(1, j, k) = t(2 * j + 1) a(2, j, k) = t(2 * n2 + 2 * j) a(3, j, k) = t(2 * n2 + 2 * j + 1) end do else if (n1 .eq. 2) then do j = 0, n2 - 1 t(2 * j) = a(0, j, k) t(2 * j + 1) = a(1, j, k) end do call cdft(2 * n2, isgn, t, ip, w) do j = 0, n2 - 1 a(0, j, k) = t(2 * j) a(1, j, k) = t(2 * j + 1) end do end if if (icr .ne. 0 .and. isgn .lt. 0) then do j = 0, n2 - 1 call rdft(n1, isgn, a(0, j, k), ip, w) end do end if end do end ! subroutine cdft3db_sub(n1max, n2max, n1, n2, n3, & isgn, a, t, ip, w) integer n1max, n2max, n1, n2, n3, isgn, ip(0 : *), & i, j, k real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) if (n1 .gt. 4) then do j = 0, n2 - 1 do i = 0, n1 - 8, 8 do k = 0, n3 - 1 t(2 * k) = a(i, j, k) t(2 * k + 1) = a(i + 1, j, k) t(2 * n3 + 2 * k) = a(i + 2, j, k) t(2 * n3 + 2 * k + 1) = a(i + 3, j, k) t(4 * n3 + 2 * k) = a(i + 4, j, k) t(4 * n3 + 2 * k + 1) = a(i + 5, j, k) t(6 * n3 + 2 * k) = a(i + 6, j, k) t(6 * n3 + 2 * k + 1) = a(i + 7, j, k) end do call cdft(2 * n3, isgn, t, ip, w) call cdft(2 * n3, isgn, t(2 * n3), ip, w) call cdft(2 * n3, isgn, t(4 * n3), ip, w) call cdft(2 * n3, isgn, t(6 * n3), ip, w) do k = 0, n3 - 1 a(i, j, k) = t(2 * k) a(i + 1, j, k) = t(2 * k + 1) a(i + 2, j, k) = t(2 * n3 + 2 * k) a(i + 3, j, k) = t(2 * n3 + 2 * k + 1) a(i + 4, j, k) = t(4 * n3 + 2 * k) a(i + 5, j, k) = t(4 * n3 + 2 * k + 1) a(i + 6, j, k) = t(6 * n3 + 2 * k) a(i + 7, j, k) = t(6 * n3 + 2 * k + 1) end do end do end do else if (n1 .eq. 4) then do j = 0, n2 - 1 do k = 0, n3 - 1 t(2 * k) = a(0, j, k) t(2 * k + 1) = a(1, j, k) t(2 * n3 + 2 * k) = a(2, j, k) t(2 * n3 + 2 * k + 1) = a(3, j, k) end do call cdft(2 * n3, isgn, t, ip, w) call cdft(2 * n3, isgn, t(2 * n3), ip, w) do k = 0, n3 - 1 a(0, j, k) = t(2 * k) a(1, j, k) = t(2 * k + 1) a(2, j, k) = t(2 * n3 + 2 * k) a(3, j, k) = t(2 * n3 + 2 * k + 1) end do end do else if (n1 .eq. 2) then do j = 0, n2 - 1 do k = 0, n3 - 1 t(2 * k) = a(0, j, k) t(2 * k + 1) = a(1, j, k) end do call cdft(2 * n3, isgn, t, ip, w) do k = 0, n3 - 1 a(0, j, k) = t(2 * k) a(1, j, k) = t(2 * k + 1) end do end do end if end ! subroutine rdft3d_sub(n1max, n2max, n1, n2, n3, isgn, a) integer n1max, n2max, n1, n2, n3, isgn, & n2h, n3h, i, j, k, l real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), xi n2h = n2 / 2 n3h = n3 / 2 if (isgn .lt. 0) then do k = 1, n3h - 1 l = n3 - k xi = a(0, 0, k) - a(0, 0, l) a(0, 0, k) = a(0, 0, k) + a(0, 0, l) a(0, 0, l) = xi xi = a(1, 0, l) - a(1, 0, k) a(1, 0, k) = a(1, 0, k) + a(1, 0, l) a(1, 0, l) = xi xi = a(0, n2h, k) - a(0, n2h, l) a(0, n2h, k) = a(0, n2h, k) + a(0, n2h, l) a(0, n2h, l) = xi xi = a(1, n2h, l) - a(1, n2h, k) a(1, n2h, k) = a(1, n2h, k) + a(1, n2h, l) a(1, n2h, l) = xi do i = 1, n2h - 1 j = n2 - i xi = a(0, i, k) - a(0, j, l) a(0, i, k) = a(0, i, k) + a(0, j, l) a(0, j, l) = xi xi = a(1, j, l) - a(1, i, k) a(1, i, k) = a(1, i, k) + a(1, j, l) a(1, j, l) = xi xi = a(0, i, l) - a(0, j, k) a(0, i, l) = a(0, i, l) + a(0, j, k) a(0, j, k) = xi xi = a(1, j, k) - a(1, i, l) a(1, i, l) = a(1, i, l) + a(1, j, k) a(1, j, k) = xi end do end do do i = 1, n2h - 1 j = n2 - i xi = a(0, i, 0) - a(0, j, 0) a(0, i, 0) = a(0, i, 0) + a(0, j, 0) a(0, j, 0) = xi xi = a(1, j, 0) - a(1, i, 0) a(1, i, 0) = a(1, i, 0) + a(1, j, 0) a(1, j, 0) = xi xi = a(0, i, n3h) - a(0, j, n3h) a(0, i, n3h) = a(0, i, n3h) + a(0, j, n3h) a(0, j, n3h) = xi xi = a(1, j, n3h) - a(1, i, n3h) a(1, i, n3h) = a(1, i, n3h) + a(1, j, n3h) a(1, j, n3h) = xi end do else do k = 1, n3h - 1 l = n3 - k a(0, 0, l) = 0.5d0 * (a(0, 0, k) - a(0, 0, l)) a(0, 0, k) = a(0, 0, k) - a(0, 0, l) a(1, 0, l) = 0.5d0 * (a(1, 0, k) + a(1, 0, l)) a(1, 0, k) = a(1, 0, k) - a(1, 0, l) a(0, n2h, l) = 0.5d0 * (a(0, n2h, k) - a(0, n2h, l)) a(0, n2h, k) = a(0, n2h, k) - a(0, n2h, l) a(1, n2h, l) = 0.5d0 * (a(1, n2h, k) + a(1, n2h, l)) a(1, n2h, k) = a(1, n2h, k) - a(1, n2h, l) do i = 1, n2h - 1 j = n2 - i a(0, j, l) = 0.5d0 * (a(0, i, k) - a(0, j, l)) a(0, i, k) = a(0, i, k) - a(0, j, l) a(1, j, l) = 0.5d0 * (a(1, i, k) + a(1, j, l)) a(1, i, k) = a(1, i, k) - a(1, j, l) a(0, j, k) = 0.5d0 * (a(0, i, l) - a(0, j, k)) a(0, i, l) = a(0, i, l) - a(0, j, k) a(1, j, k) = 0.5d0 * (a(1, i, l) + a(1, j, k)) a(1, i, l) = a(1, i, l) - a(1, j, k) end do end do do i = 1, n2h - 1 j = n2 - i a(0, j, 0) = 0.5d0 * (a(0, i, 0) - a(0, j, 0)) a(0, i, 0) = a(0, i, 0) - a(0, j, 0) a(1, j, 0) = 0.5d0 * (a(1, i, 0) + a(1, j, 0)) a(1, i, 0) = a(1, i, 0) - a(1, j, 0) a(0, j, n3h) = 0.5d0 * (a(0, i, n3h) - a(0, j, n3h)) a(0, i, n3h) = a(0, i, n3h) - a(0, j, n3h) a(1, j, n3h) = 0.5d0 * (a(1, i, n3h) + a(1, j, n3h)) a(1, i, n3h) = a(1, i, n3h) - a(1, j, n3h) end do end if end ! subroutine ddxt3da_sub(n1max, n2max, n1, n2, n3, ics, & isgn, a, t, ip, w) integer n1max, n2max, n1, n2, n3, ics, isgn, & ip(0 : *), i, j, k real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) do k = 0, n3 - 1 if (ics .eq. 0) then do j = 0, n2 - 1 call ddct(n1, isgn, a(0, j, k), ip, w) end do else do j = 0, n2 - 1 call ddst(n1, isgn, a(0, j, k), ip, w) end do end if if (n1 .gt. 2) then do i = 0, n1 - 4, 4 do j = 0, n2 - 1 t(j) = a(i, j, k) t(n2 + j) = a(i + 1, j, k) t(2 * n2 + j) = a(i + 2, j, k) t(3 * n2 + j) = a(i + 3, j, k) end do if (ics .eq. 0) then call ddct(n2, isgn, t, ip, w) call ddct(n2, isgn, t(n2), ip, w) call ddct(n2, isgn, t(2 * n2), ip, w) call ddct(n2, isgn, t(3 * n2), ip, w) else call ddst(n2, isgn, t, ip, w) call ddst(n2, isgn, t(n2), ip, w) call ddst(n2, isgn, t(2 * n2), ip, w) call ddst(n2, isgn, t(3 * n2), ip, w) end if do j = 0, n2 - 1 a(i, j, k) = t(j) a(i + 1, j, k) = t(n2 + j) a(i + 2, j, k) = t(2 * n2 + j) a(i + 3, j, k) = t(3 * n2 + j) end do end do else if (n1 .eq. 2) then do j = 0, n2 - 1 t(j) = a(0, j, k) t(n2 + j) = a(1, j, k) end do if (ics .eq. 0) then call ddct(n2, isgn, t, ip, w) call ddct(n2, isgn, t(n2), ip, w) else call ddst(n2, isgn, t, ip, w) call ddst(n2, isgn, t(n2), ip, w) end if do j = 0, n2 - 1 a(0, j, k) = t(j) a(1, j, k) = t(n2 + j) end do end if end do end ! subroutine ddxt3db_sub(n1max, n2max, n1, n2, n3, ics, & isgn, a, t, ip, w) integer n1max, n2max, n1, n2, n3, ics, isgn, & ip(0 : *), i, j, k real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : n3 - 1), & t(0 : *), w(0 : *) if (n1 .gt. 2) then do j = 0, n2 - 1 do i = 0, n1 - 4, 4 do k = 0, n3 - 1 t(k) = a(i, j, k) t(n3 + k) = a(i + 1, j, k) t(2 * n3 + k) = a(i + 2, j, k) t(3 * n3 + k) = a(i + 3, j, k) end do if (ics .eq. 0) then call ddct(n3, isgn, t, ip, w) call ddct(n3, isgn, t(n3), ip, w) call ddct(n3, isgn, t(2 * n3), ip, w) call ddct(n3, isgn, t(3 * n3), ip, w) else call ddst(n3, isgn, t, ip, w) call ddst(n3, isgn, t(n3), ip, w) call ddst(n3, isgn, t(2 * n3), ip, w) call ddst(n3, isgn, t(3 * n3), ip, w) end if do k = 0, n3 - 1 a(i, j, k) = t(k) a(i + 1, j, k) = t(n3 + k) a(i + 2, j, k) = t(2 * n3 + k) a(i + 3, j, k) = t(3 * n3 + k) end do end do end do else if (n1 .eq. 2) then do j = 0, n2 - 1 do k = 0, n3 - 1 t(k) = a(0, j, k) t(n3 + k) = a(1, j, k) end do if (ics .eq. 0) then call ddct(n3, isgn, t, ip, w) call ddct(n3, isgn, t(n3), ip, w) else call ddst(n3, isgn, t, ip, w) call ddst(n3, isgn, t(n3), ip, w) end if do k = 0, n3 - 1 a(0, j, k) = t(k) a(1, j, k) = t(n3 + k) end do end do end if end ! OouraFFT-1.0/fftsg_h.c000066400000000000000000002571741246725775500146270ustar00rootroot00000000000000/* Fast Fourier/Cosine/Sine Transform dimension :one data length :power of 2 decimation :frequency radix :split-radix data :inplace table :not use functions cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, double *); void rdft(int, int, double *); void ddct(int, int, double *); void ddst(int, int, double *); void dfct(int, double *); void dfst(int, double *); macro definitions USE_CDFT_PTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=8192 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536 USE_CDFT_WINTHREADS : default=not defined CDFT_THREADS_BEGIN_N : must be >= 512, default=32768 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288 -------- Complex DFT (Discrete Fourier Transform) -------- [definition] X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k cdft(2*n, 1, a); cdft(2*n, -1, a); [parameters] 2*n :data length (int) n >= 1, n = power of 2 a[0...2*n-1] :input/output data (double *) input data a[2*j] = Re(x[j]), a[2*j+1] = Im(x[j]), 0<=j RDFT R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0 IRDFT (excluding scale) a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k rdft(n, 1, a); rdft(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[2*k] = R[k], 0<=k input data a[2*j] = R[j], 0<=j IDCT (excluding scale) C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k DCT C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k ddct(n, 1, a); ddct(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = C[k], 0<=k IDST (excluding scale) S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k DST S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0 ddst(n, 1, a); ddst(n, -1, a); [parameters] n :data length (int) n >= 2, n = power of 2 a[0...n-1] :input/output data (double *) input data a[j] = A[j], 0 output data a[k] = S[k], 0= 2, n = power of 2 a[0...n] :input/output data (double *) output data a[k] = C[k], 0<=k<=n [remark] Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); is a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); for (j = 0; j <= n; j++) { a[j] *= 2.0 / n; } . -------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- [definition] S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0= 2, n = power of 2 a[0...n-1] :input/output data (double *) output data a[k] = S[k], 0= 0) { cftfsub(n, a); } else { cftbsub(n, a); } } void rdft(int n, int isgn, double *a) { void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); double xi; if (isgn >= 0) { if (n > 4) { cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xi = a[0] - a[1]; a[0] += a[1]; a[1] = xi; } else { a[1] = 0.5 * (a[0] - a[1]); a[0] -= a[1]; if (n > 4) { rftbsub(n, a); cftbsub(n, a); } else if (n == 4) { cftbsub(n, a); } } } void ddct(int n, int isgn, double *a) { void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dctsub(int n, double *a); void dctsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = a[j] - a[j - 1]; a[j] += a[j - 1]; } a[1] = a[0] - xr; a[0] += xr; if (n > 4) { rftbsub(n, a); cftbsub(n, a); } else if (n == 4) { cftbsub(n, a); } } if (n > 4) { dctsub(n, a); } else { dctsub4(n, a); } if (isgn >= 0) { if (n > 4) { cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = a[j] - a[j + 1]; a[j] += a[j + 1]; } a[n - 1] = xr; } } void ddst(int n, int isgn, double *a) { void cftfsub(int n, double *a); void cftbsub(int n, double *a); void rftfsub(int n, double *a); void rftbsub(int n, double *a); void dstsub(int n, double *a); void dstsub4(int n, double *a); int j; double xr; if (isgn < 0) { xr = a[n - 1]; for (j = n - 2; j >= 2; j -= 2) { a[j + 1] = -a[j] - a[j - 1]; a[j] -= a[j - 1]; } a[1] = a[0] + xr; a[0] -= xr; if (n > 4) { rftbsub(n, a); cftbsub(n, a); } else if (n == 4) { cftbsub(n, a); } } if (n > 4) { dstsub(n, a); } else { dstsub4(n, a); } if (isgn >= 0) { if (n > 4) { cftfsub(n, a); rftfsub(n, a); } else if (n == 4) { cftfsub(n, a); } xr = a[0] - a[1]; a[0] += a[1]; for (j = 2; j < n; j += 2) { a[j - 1] = -a[j] - a[j + 1]; a[j] -= a[j + 1]; } a[n - 1] = -xr; } } void dfct(int n, double *a) { void ddct(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi, an; m = n >> 1; for (j = 0; j < m; j++) { k = n - j; xr = a[j] + a[k]; a[j] -= a[k]; a[k] = xr; } an = a[n]; while (m >= 2) { ddct(m, 1, a); if (m > 2) { bitrv1(m, a); } mh = m >> 1; xi = a[m]; a[m] = a[0]; a[0] = an - xi; an += xi; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr - xi; a[k] = xr + xi; } xr = a[mh]; a[mh] = a[m + mh]; a[m + mh] = xr; m = mh; } xi = a[1]; a[1] = a[0]; a[0] = an + xi; a[n] = an - xi; if (n > 2) { bitrv1(n, a); } } void dfst(int n, double *a) { void ddst(int n, int isgn, double *a); void bitrv1(int n, double *a); int j, k, m, mh; double xr, xi, yr, yi; m = n >> 1; for (j = 1; j < m; j++) { k = n - j; xr = a[j] - a[k]; a[j] += a[k]; a[k] = xr; } a[0] = a[m]; while (m >= 2) { ddst(m, 1, a); if (m > 2) { bitrv1(m, a); } mh = m >> 1; for (j = 1; j < mh; j++) { k = m - j; xr = a[m + k]; xi = a[m + j]; yr = a[j]; yi = a[k]; a[m + j] = yr; a[m + k] = yi; a[j] = xr + xi; a[k] = xr - xi; } a[m] = a[0]; a[0] = a[m + mh]; a[m + mh] = a[mh]; m = mh; } a[1] = a[0]; a[0] = 0; if (n > 2) { bitrv1(n, a); } } /* -------- child routines -------- */ #include #ifndef M_PI_2 #define M_PI_2 1.570796326794896619231321691639751442098584699687 #endif #ifndef WR5000 /* cos(M_PI_2*0.5000) */ #define WR5000 0.707106781186547524400844362104849039284835937688 #endif #ifndef WR2500 /* cos(M_PI_2*0.2500) */ #define WR2500 0.923879532511286756128183189396788286822416625863 #endif #ifndef WI2500 /* sin(M_PI_2*0.2500) */ #define WI2500 0.382683432365089771728459984030398866761344562485 #endif #ifndef WR1250 /* cos(M_PI_2*0.1250) */ #define WR1250 0.980785280403230449126182236134239036973933730893 #endif #ifndef WI1250 /* sin(M_PI_2*0.1250) */ #define WI1250 0.195090322016128267848284868477022240927691617751 #endif #ifndef WR3750 /* cos(M_PI_2*0.3750) */ #define WR3750 0.831469612302545237078788377617905756738560811987 #endif #ifndef WI3750 /* sin(M_PI_2*0.3750) */ #define WI3750 0.555570233019602224742830813948532874374937190754 #endif #ifdef USE_CDFT_PTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 8192 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 65536 #endif #include #include #include #define cdft_thread_t pthread_t #define cdft_thread_create(thp,func,argp) { \ if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ if (pthread_join(th, NULL) != 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #endif /* USE_CDFT_PTHREADS */ #ifdef USE_CDFT_WINTHREADS #define USE_CDFT_THREADS #ifndef CDFT_THREADS_BEGIN_N #define CDFT_THREADS_BEGIN_N 32768 #endif #ifndef CDFT_4THREADS_BEGIN_N #define CDFT_4THREADS_BEGIN_N 524288 #endif #include #include #include #define cdft_thread_t HANDLE #define cdft_thread_create(thp,func,argp) { \ DWORD thid; \ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \ if (*(thp) == 0) { \ fprintf(stderr, "cdft thread error\n"); \ exit(1); \ } \ } #define cdft_thread_wait(th) { \ WaitForSingleObject(th, INFINITE); \ CloseHandle(th); \ } #endif /* USE_CDFT_WINTHREADS */ #ifndef CDFT_LOOP_DIV /* control of the CDFT's speed & tolerance */ #define CDFT_LOOP_DIV 32 #endif #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ #define RDFT_LOOP_DIV 64 #endif #ifndef DCST_LOOP_DIV /* control of the DCT,DST's speed & tolerance */ #define DCST_LOOP_DIV 64 #endif void cftfsub(int n, double *a) { void bitrv2(int n, double *a); void bitrv216(double *a); void bitrv208(double *a); void cftmdl1(int n, double *a); void cftrec4(int n, double *a); void cftleaf(int n, int isplt, double *a); void cftfx41(int n, double *a); void cftf161(double *a); void cftf081(double *a); void cftf040(double *a); void cftx020(double *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, double *a); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftmdl1(n, a); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a); } else if (n > 128) { cftleaf(n, 1, a); } else { cftfx41(n, a); } bitrv2(n, a); } else if (n == 32) { cftf161(a); bitrv216(a); } else { cftf081(a); bitrv208(a); } } else if (n == 8) { cftf040(a); } else if (n == 4) { cftx020(a); } } void cftbsub(int n, double *a) { void bitrv2conj(int n, double *a); void bitrv216neg(double *a); void bitrv208neg(double *a); void cftb1st(int n, double *a); void cftrec4(int n, double *a); void cftleaf(int n, int isplt, double *a); void cftfx41(int n, double *a); void cftf161(double *a); void cftf081(double *a); void cftb040(double *a); void cftx020(double *a); #ifdef USE_CDFT_THREADS void cftrec4_th(int n, double *a); #endif /* USE_CDFT_THREADS */ if (n > 8) { if (n > 32) { cftb1st(n, a); #ifdef USE_CDFT_THREADS if (n > CDFT_THREADS_BEGIN_N) { cftrec4_th(n, a); } else #endif /* USE_CDFT_THREADS */ if (n > 512) { cftrec4(n, a); } else if (n > 128) { cftleaf(n, 1, a); } else { cftfx41(n, a); } bitrv2conj(n, a); } else if (n == 32) { cftf161(a); bitrv216neg(a); } else { cftf081(a); bitrv208neg(a); } } else if (n == 8) { cftb040(a); } else if (n == 4) { cftx020(a); } } void bitrv2(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k, nh; double xr, xi, yr, yi; m = 4; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; if (l == 8) { j0 = 0; for (k0 = 0; k0 < m; k0 += 4) { k = k0; for (j = j0; j < j0 + k0; j += 4) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 += m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 += m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - m; k1 += 2 * m - 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 0; k0 < m; k0 += 4) { k = k0; for (j = j0; j < j0 + k0; j += 4) { xr = a[j]; xi = a[j + 1]; yr = a[k]; yi = a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 2; k1 += nh; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += m; xr = a[j1]; xi = a[j1 + 1]; yr = a[k1]; yi = a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (j0 ^= i); i >>= 1); } } } void bitrv2conj(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k, nh; double xr, xi, yr, yi; m = 4; for (l = n >> 2; l > 8; l >>= 2) { m <<= 1; } nh = n >> 1; if (l == 8) { j0 = 0; for (k0 = 0; k0 < m; k0 += 4) { k = k0; for (j = j0; j < j0 + k0; j += 4) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 += m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 += m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += m; k1 += 2 * m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= 2; k1 -= nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh + 2; k1 += nh + 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh - m; k1 += 2 * m - 2; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; for (i = nh >> 1; i > (j0 ^= i); i >>= 1); } } else { j0 = 0; for (k0 = 0; k0 < m; k0 += 4) { k = k0; for (j = j0; j < j0 + k0; j += 4) { xr = a[j]; xi = -a[j + 1]; yr = a[k]; yi = -a[k + 1]; a[j] = yr; a[j + 1] = yi; a[k] = xr; a[k + 1] = xi; j1 = j + m; k1 = k + m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += nh; k1 += 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += 2; k1 += nh; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 += m; k1 += m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= nh; k1 -= 2; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; j1 -= m; k1 -= m; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 2; k1 += nh; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; j1 += m; k1 += m; a[j1 - 1] = -a[j1 - 1]; xr = a[j1]; xi = -a[j1 + 1]; yr = a[k1]; yi = -a[k1 + 1]; a[j1] = yr; a[j1 + 1] = yi; a[k1] = xr; a[k1 + 1] = xi; a[k1 + 3] = -a[k1 + 3]; for (i = nh >> 1; i > (j0 ^= i); i >>= 1); } } } void bitrv216(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; a[2] = x8r; a[3] = x8i; a[4] = x4r; a[5] = x4i; a[6] = x12r; a[7] = x12i; a[8] = x2r; a[9] = x2i; a[10] = x10r; a[11] = x10i; a[14] = x14r; a[15] = x14i; a[16] = x1r; a[17] = x1i; a[20] = x5r; a[21] = x5i; a[22] = x13r; a[23] = x13i; a[24] = x3r; a[25] = x3i; a[26] = x11r; a[27] = x11i; a[28] = x7r; a[29] = x7i; } void bitrv216neg(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i, x15r, x15i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; x8r = a[16]; x8i = a[17]; x9r = a[18]; x9i = a[19]; x10r = a[20]; x10i = a[21]; x11r = a[22]; x11i = a[23]; x12r = a[24]; x12i = a[25]; x13r = a[26]; x13i = a[27]; x14r = a[28]; x14i = a[29]; x15r = a[30]; x15i = a[31]; a[2] = x15r; a[3] = x15i; a[4] = x7r; a[5] = x7i; a[6] = x11r; a[7] = x11i; a[8] = x3r; a[9] = x3i; a[10] = x13r; a[11] = x13i; a[12] = x5r; a[13] = x5i; a[14] = x9r; a[15] = x9i; a[16] = x1r; a[17] = x1i; a[18] = x14r; a[19] = x14i; a[20] = x6r; a[21] = x6i; a[22] = x10r; a[23] = x10i; a[24] = x2r; a[25] = x2i; a[26] = x12r; a[27] = x12i; a[28] = x4r; a[29] = x4i; a[30] = x8r; a[31] = x8i; } void bitrv208(double *a) { double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i; x1r = a[2]; x1i = a[3]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x6r = a[12]; x6i = a[13]; a[2] = x4r; a[3] = x4i; a[6] = x6r; a[7] = x6i; a[8] = x1r; a[9] = x1i; a[12] = x3r; a[13] = x3i; } void bitrv208neg(double *a) { double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; x1r = a[2]; x1i = a[3]; x2r = a[4]; x2i = a[5]; x3r = a[6]; x3i = a[7]; x4r = a[8]; x4i = a[9]; x5r = a[10]; x5i = a[11]; x6r = a[12]; x6i = a[13]; x7r = a[14]; x7i = a[15]; a[2] = x7r; a[3] = x7i; a[4] = x3r; a[5] = x3i; a[6] = x5r; a[7] = x5i; a[8] = x1r; a[9] = x1i; a[10] = x6r; a[11] = x6i; a[12] = x2r; a[13] = x2i; a[14] = x4r; a[15] = x4i; } void bitrv1(int n, double *a) { int j0, k0, j1, k1, l, m, i, j, k, nh; double x; nh = n >> 1; x = a[1]; a[1] = a[nh]; a[nh] = x; m = 2; for (l = n >> 2; l > 2; l >>= 2) { m <<= 1; } if (l == 2) { j1 = m + 1; k1 = m + nh; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = nh >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + m; k1 = k + m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += nh; k1++; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 -= m; k1 -= m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1++; k1 += nh; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 += m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 -= nh; k1--; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 -= m; k1 -= m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 1; k1 += nh; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 += m; k1 += m; x = a[j1]; a[j1] = a[k1]; a[k1] = x; } } else { j0 = 0; for (k0 = 2; k0 < m; k0 += 2) { for (i = nh >> 1; i > (j0 ^= i); i >>= 1); k = k0; for (j = j0; j < j0 + k0; j += 2) { x = a[j]; a[j] = a[k]; a[k] = x; j1 = j + nh; k1 = k + 1; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1++; k1 += nh; x = a[j1]; a[j1] = a[k1]; a[k1] = x; j1 -= nh; k1--; x = a[j1]; a[j1] = a[k1]; a[k1] = x; for (i = nh >> 1; i > (k ^= i); i >>= 1); } k1 = j0 + k0; j1 = k1 + 1; k1 += nh; x = a[j1]; a[j1] = a[k1]; a[k1] = x; } } } void cftb1st(int n, double *a) { int i, i0, j, j0, j1, j2, j3, m, mh; double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1, ss3; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = -a[1] - a[j2 + 1]; x1r = a[0] - a[j2]; x1i = -a[1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; a[j2] = x1r + x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r - x3i; a[j3 + 1] = x1i - x3r; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; ew = M_PI_2 / m; w1r = cos(2 * ew); w1i = sin(2 * ew); wk1r = w1r; wk1i = w1i; ss1 = 2 * w1i; wk3i = 2 * ss1 * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; ss3 = 2 * wk3i; i = 0; for (;;) { i0 = i + 4 * CDFT_LOOP_DIV; if (i0 > mh - 4) { i0 = mh - 4; } for (j = i + 2; j < i0; j += 4) { wd1r -= ss1 * wk1i; wd1i += ss1 * wk1r; wd3r -= ss3 * wk3i; wd3i += ss3 * wk3r; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = -a[j + 1] - a[j2 + 1]; x1r = a[j] - a[j2]; x1i = -a[j + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = a[j + 2] + a[j2 + 2]; x0i = -a[j + 3] - a[j2 + 3]; x1r = a[j + 2] - a[j2 + 2]; x1i = -a[j + 3] + a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j + 2] = x0r + x2r; a[j + 3] = x0i - x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = a[j0 - 2] + a[j2 - 2]; x0i = -a[j0 - 1] - a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i - x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; wk1r -= ss1 * wd1i; wk1i += ss1 * wd1r; wk3r -= ss3 * wd3i; wk3i += ss3 * wd3r; } if (i0 == mh - 4) { break; } wd1r = cos(ew * i0); wd1i = sin(ew * i0); wd3i = 4 * wd1i * wd1r; wd3r = wd1r - wd3i * wd1i; wd3i = wd1i - wd3i * wd1r; wk1r = w1r * wd1r - w1i * wd1i; wk1i = w1r * wd1i + w1i * wd1r; wk3i = 4 * wk1i * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; i = i0; } wd1r = WR5000; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = -a[j0 - 1] - a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = -a[j0 - 1] + a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i - x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = -a[j0 + 1] - a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = -a[j0 + 1] + a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i - x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2] = wd1r * (x0r - x0i); a[j2 + 1] = wd1r * (x0i + x0r); x0r = x1r - x3i; x0i = x1i - x3r; a[j3] = -wd1r * (x0r + x0i); a[j3 + 1] = -wd1r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = -a[j0 + 3] - a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = -a[j0 + 3] + a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i - x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i + x2i; x0r = x1r + x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r - x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } #ifdef USE_CDFT_THREADS struct cdft_arg_st { int n0; int n; double *a; }; typedef struct cdft_arg_st cdft_arg_t; void cftrec4_th(int n, double *a) { void *cftrec1_th(void *p); void *cftrec2_th(void *p); int i, idiv4, m, nthread; cdft_thread_t th[4]; cdft_arg_t ag[4]; nthread = 2; idiv4 = 0; m = n >> 1; if (n > CDFT_4THREADS_BEGIN_N) { nthread = 4; idiv4 = 1; m >>= 1; } for (i = 0; i < nthread; i++) { ag[i].n0 = n; ag[i].n = m; ag[i].a = &a[i * m]; if (i != idiv4) { cdft_thread_create(&th[i], cftrec1_th, &ag[i]); } else { cdft_thread_create(&th[i], cftrec2_th, &ag[i]); } } for (i = 0; i < nthread; i++) { cdft_thread_wait(th[i]); } } void *cftrec1_th(void *p) { int cfttree(int n, int j, int k, double *a); void cftleaf(int n, int isplt, double *a); void cftmdl1(int n, double *a); int isplt, j, k, m, n, n0; double *a; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; m = n0; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m]); } cftleaf(m, 1, &a[n - m]); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a); cftleaf(m, isplt, &a[j - m]); } return (void *) 0; } void *cftrec2_th(void *p) { int cfttree(int n, int j, int k, double *a); void cftleaf(int n, int isplt, double *a); void cftmdl2(int n, double *a); int isplt, j, k, m, n, n0; double *a; n0 = ((cdft_arg_t *) p)->n0; n = ((cdft_arg_t *) p)->n; a = ((cdft_arg_t *) p)->a; k = 1; m = n0; while (m > 512) { m >>= 2; k <<= 2; cftmdl2(m, &a[n - m]); } cftleaf(m, 0, &a[n - m]); k >>= 1; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a); cftleaf(m, isplt, &a[j - m]); } return (void *) 0; } #endif /* USE_CDFT_THREADS */ void cftrec4(int n, double *a) { int cfttree(int n, int j, int k, double *a); void cftleaf(int n, int isplt, double *a); void cftmdl1(int n, double *a); int isplt, j, k, m; m = n; while (m > 512) { m >>= 2; cftmdl1(m, &a[n - m]); } cftleaf(m, 1, &a[n - m]); k = 0; for (j = n - m; j > 0; j -= m) { k++; isplt = cfttree(m, j, k, a); cftleaf(m, isplt, &a[j - m]); } } int cfttree(int n, int j, int k, double *a) { void cftmdl1(int n, double *a); void cftmdl2(int n, double *a); int i, isplt, m; if ((k & 3) != 0) { isplt = k & 1; if (isplt != 0) { cftmdl1(n, &a[j - n]); } else { cftmdl2(n, &a[j - n]); } } else { m = n; for (i = k; (i & 3) == 0; i >>= 2) { m <<= 2; } isplt = i & 1; if (isplt != 0) { while (m > 128) { cftmdl1(m, &a[j - m]); m >>= 2; } } else { while (m > 128) { cftmdl2(m, &a[j - m]); m >>= 2; } } } return isplt; } void cftleaf(int n, int isplt, double *a) { void cftmdl1(int n, double *a); void cftmdl2(int n, double *a); void cftf161(double *a); void cftf162(double *a); void cftf081(double *a); void cftf082(double *a); if (n == 512) { cftmdl1(128, a); cftf161(a); cftf162(&a[32]); cftf161(&a[64]); cftf161(&a[96]); cftmdl2(128, &a[128]); cftf161(&a[128]); cftf162(&a[160]); cftf161(&a[192]); cftf162(&a[224]); cftmdl1(128, &a[256]); cftf161(&a[256]); cftf162(&a[288]); cftf161(&a[320]); cftf161(&a[352]); if (isplt != 0) { cftmdl1(128, &a[384]); cftf161(&a[480]); } else { cftmdl2(128, &a[384]); cftf162(&a[480]); } cftf161(&a[384]); cftf162(&a[416]); cftf161(&a[448]); } else { cftmdl1(64, a); cftf081(a); cftf082(&a[16]); cftf081(&a[32]); cftf081(&a[48]); cftmdl2(64, &a[64]); cftf081(&a[64]); cftf082(&a[80]); cftf081(&a[96]); cftf082(&a[112]); cftmdl1(64, &a[128]); cftf081(&a[128]); cftf082(&a[144]); cftf081(&a[160]); cftf081(&a[176]); if (isplt != 0) { cftmdl1(64, &a[192]); cftf081(&a[240]); } else { cftmdl2(64, &a[192]); cftf082(&a[240]); } cftf081(&a[192]); cftf082(&a[208]); cftf081(&a[224]); } } void cftmdl1(int n, double *a) { int i, i0, j, j0, j1, j2, j3, m, mh; double ew, w1r, w1i, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i, ss1, ss3; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; mh = n >> 3; m = 2 * mh; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] + a[j2]; x0i = a[1] + a[j2 + 1]; x1r = a[0] - a[j2]; x1i = a[1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; a[j2] = x1r - x3i; a[j2 + 1] = x1i + x3r; a[j3] = x1r + x3i; a[j3 + 1] = x1i - x3r; wd1r = 1; wd1i = 0; wd3r = 1; wd3i = 0; ew = M_PI_2 / m; w1r = cos(2 * ew); w1i = sin(2 * ew); wk1r = w1r; wk1i = w1i; ss1 = 2 * w1i; wk3i = 2 * ss1 * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; ss3 = 2 * wk3i; i = 0; for (;;) { i0 = i + 4 * CDFT_LOOP_DIV; if (i0 > mh - 4) { i0 = mh - 4; } for (j = i + 2; j < i0; j += 4) { wd1r -= ss1 * wk1i; wd1i += ss1 * wk1r; wd3r -= ss3 * wk3i; wd3i += ss3 * wk3r; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] + a[j2]; x0i = a[j + 1] + a[j2 + 1]; x1r = a[j] - a[j2]; x1i = a[j + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j] = x0r + x2r; a[j + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1r * x0r - wk1i * x0i; a[j2 + 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3r * x0r + wk3i * x0i; a[j3 + 1] = wk3r * x0i - wk3i * x0r; x0r = a[j + 2] + a[j2 + 2]; x0i = a[j + 3] + a[j2 + 3]; x1r = a[j + 2] - a[j2 + 2]; x1i = a[j + 3] - a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j + 2] = x0r + x2r; a[j + 3] = x0i + x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 + 2] = wd1r * x0r - wd1i * x0i; a[j2 + 3] = wd1r * x0i + wd1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 + 2] = wd3r * x0r + wd3i * x0i; a[j3 + 3] = wd3r * x0i - wd3i * x0r; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wk1i * x0r - wk1r * x0i; a[j2 + 1] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = wk3i * x0r + wk3r * x0i; a[j3 + 1] = wk3i * x0i - wk3r * x0r; x0r = a[j0 - 2] + a[j2 - 2]; x0i = a[j0 - 1] + a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i + x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 - 2] = wd1i * x0r - wd1r * x0i; a[j2 - 1] = wd1i * x0i + wd1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 - 2] = wd3i * x0r + wd3r * x0i; a[j3 - 1] = wd3i * x0i - wd3r * x0r; wk1r -= ss1 * wd1i; wk1i += ss1 * wd1r; wk3r -= ss3 * wd3i; wk3i += ss3 * wd3r; } if (i0 == mh - 4) { break; } wd1r = cos(ew * i0); wd1i = sin(ew * i0); wd3i = 4 * wd1i * wd1r; wd3r = wd1r - wd3i * wd1i; wd3i = wd1i - wd3i * wd1r; wk1r = w1r * wd1r - w1i * wd1i; wk1i = w1r * wd1i + w1i * wd1r; wk3i = 4 * wk1i * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; i = i0; } wd1r = WR5000; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] + a[j2 - 2]; x0i = a[j0 - 1] + a[j2 - 1]; x1r = a[j0 - 2] - a[j2 - 2]; x1i = a[j0 - 1] - a[j2 - 1]; x2r = a[j1 - 2] + a[j3 - 2]; x2i = a[j1 - 1] + a[j3 - 1]; x3r = a[j1 - 2] - a[j3 - 2]; x3i = a[j1 - 1] - a[j3 - 1]; a[j0 - 2] = x0r + x2r; a[j0 - 1] = x0i + x2i; a[j1 - 2] = x0r - x2r; a[j1 - 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 - 2] = wk1r * x0r - wk1i * x0i; a[j2 - 1] = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 - 2] = wk3r * x0r + wk3i * x0i; a[j3 - 1] = wk3r * x0i - wk3i * x0r; x0r = a[j0] + a[j2]; x0i = a[j0 + 1] + a[j2 + 1]; x1r = a[j0] - a[j2]; x1i = a[j0 + 1] - a[j2 + 1]; x2r = a[j1] + a[j3]; x2i = a[j1 + 1] + a[j3 + 1]; x3r = a[j1] - a[j3]; x3i = a[j1 + 1] - a[j3 + 1]; a[j0] = x0r + x2r; a[j0 + 1] = x0i + x2i; a[j1] = x0r - x2r; a[j1 + 1] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2] = wd1r * (x0r - x0i); a[j2 + 1] = wd1r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; a[j3] = -wd1r * (x0r + x0i); a[j3 + 1] = -wd1r * (x0i - x0r); x0r = a[j0 + 2] + a[j2 + 2]; x0i = a[j0 + 3] + a[j2 + 3]; x1r = a[j0 + 2] - a[j2 + 2]; x1i = a[j0 + 3] - a[j2 + 3]; x2r = a[j1 + 2] + a[j3 + 2]; x2i = a[j1 + 3] + a[j3 + 3]; x3r = a[j1 + 2] - a[j3 + 2]; x3i = a[j1 + 3] - a[j3 + 3]; a[j0 + 2] = x0r + x2r; a[j0 + 3] = x0i + x2i; a[j1 + 2] = x0r - x2r; a[j1 + 3] = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; a[j2 + 2] = wk1i * x0r - wk1r * x0i; a[j2 + 3] = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; a[j3 + 2] = wk3i * x0r + wk3r * x0i; a[j3 + 3] = wk3i * x0i - wk3r * x0r; } void cftmdl2(int n, double *a) { int i, i0, j, j0, j1, j2, j3, m, mh; double ew, w1r, w1i, wn4r, wk1r, wk1i, wk3r, wk3i, wl1r, wl1i, wl3r, wl3i, wd1r, wd1i, wd3r, wd3i, we1r, we1i, we3r, we3i, ss1, ss3; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i; mh = n >> 3; m = 2 * mh; wn4r = WR5000; j1 = m; j2 = j1 + m; j3 = j2 + m; x0r = a[0] - a[j2 + 1]; x0i = a[1] + a[j2]; x1r = a[0] + a[j2 + 1]; x1i = a[1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wn4r * (x2r - x2i); y0i = wn4r * (x2i + x2r); a[0] = x0r + y0r; a[1] = x0i + y0i; a[j1] = x0r - y0r; a[j1 + 1] = x0i - y0i; y0r = wn4r * (x3r - x3i); y0i = wn4r * (x3i + x3r); a[j2] = x1r - y0i; a[j2 + 1] = x1i + y0r; a[j3] = x1r + y0i; a[j3 + 1] = x1i - y0r; wl1r = 1; wl1i = 0; wl3r = 1; wl3i = 0; we1r = wn4r; we1i = wn4r; we3r = -wn4r; we3i = -wn4r; ew = M_PI_2 / (2 * m); w1r = cos(2 * ew); w1i = sin(2 * ew); wk1r = w1r; wk1i = w1i; wd1r = wn4r * (w1r - w1i); wd1i = wn4r * (w1i + w1r); ss1 = 2 * w1i; wk3i = 2 * ss1 * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; ss3 = 2 * wk3i; wd3r = -wn4r * (wk3r - wk3i); wd3i = -wn4r * (wk3i + wk3r); i = 0; for (;;) { i0 = i + 4 * CDFT_LOOP_DIV; if (i0 > mh - 4) { i0 = mh - 4; } for (j = i + 2; j < i0; j += 4) { wl1r -= ss1 * wk1i; wl1i += ss1 * wk1r; wl3r -= ss3 * wk3i; wl3i += ss3 * wk3r; we1r -= ss1 * wd1i; we1i += ss1 * wd1r; we3r -= ss3 * wd3i; we3i += ss3 * wd3r; j1 = j + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j] - a[j2 + 1]; x0i = a[j + 1] + a[j2]; x1r = a[j] + a[j2 + 1]; x1i = a[j + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wd1r * x2r - wd1i * x2i; y2i = wd1r * x2i + wd1i * x2r; a[j] = y0r + y2r; a[j + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wk3r * x1r + wk3i * x1i; y0i = wk3r * x1i - wk3i * x1r; y2r = wd3r * x3r + wd3i * x3i; y2i = wd3r * x3i - wd3i * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; x0r = a[j + 2] - a[j2 + 3]; x0i = a[j + 3] + a[j2 + 2]; x1r = a[j + 2] + a[j2 + 3]; x1i = a[j + 3] - a[j2 + 2]; x2r = a[j1 + 2] - a[j3 + 3]; x2i = a[j1 + 3] + a[j3 + 2]; x3r = a[j1 + 2] + a[j3 + 3]; x3i = a[j1 + 3] - a[j3 + 2]; y0r = wl1r * x0r - wl1i * x0i; y0i = wl1r * x0i + wl1i * x0r; y2r = we1r * x2r - we1i * x2i; y2i = we1r * x2i + we1i * x2r; a[j + 2] = y0r + y2r; a[j + 3] = y0i + y2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i - y2i; y0r = wl3r * x1r + wl3i * x1i; y0i = wl3r * x1i - wl3i * x1r; y2r = we3r * x3r + we3i * x3i; y2i = we3r * x3i - we3i * x3r; a[j2 + 2] = y0r + y2r; a[j2 + 3] = y0i + y2i; a[j3 + 2] = y0r - y2r; a[j3 + 3] = y0i - y2i; j0 = m - j; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wd1i * x0r - wd1r * x0i; y0i = wd1i * x0i + wd1r * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wd3i * x1r + wd3r * x1i; y0i = wd3i * x1i - wd3r * x1r; y2r = wk3i * x3r + wk3r * x3i; y2i = wk3i * x3i - wk3r * x3r; a[j2] = y0r + y2r; a[j2 + 1] = y0i + y2i; a[j3] = y0r - y2r; a[j3 + 1] = y0i - y2i; x0r = a[j0 - 2] - a[j2 - 1]; x0i = a[j0 - 1] + a[j2 - 2]; x1r = a[j0 - 2] + a[j2 - 1]; x1i = a[j0 - 1] - a[j2 - 2]; x2r = a[j1 - 2] - a[j3 - 1]; x2i = a[j1 - 1] + a[j3 - 2]; x3r = a[j1 - 2] + a[j3 - 1]; x3i = a[j1 - 1] - a[j3 - 2]; y0r = we1i * x0r - we1r * x0i; y0i = we1i * x0i + we1r * x0r; y2r = wl1i * x2r - wl1r * x2i; y2i = wl1i * x2i + wl1r * x2r; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i + y2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i - y2i; y0r = we3i * x1r + we3r * x1i; y0i = we3i * x1i - we3r * x1r; y2r = wl3i * x3r + wl3r * x3i; y2i = wl3i * x3i - wl3r * x3r; a[j2 - 2] = y0r + y2r; a[j2 - 1] = y0i + y2i; a[j3 - 2] = y0r - y2r; a[j3 - 1] = y0i - y2i; wk1r -= ss1 * wl1i; wk1i += ss1 * wl1r; wk3r -= ss3 * wl3i; wk3i += ss3 * wl3r; wd1r -= ss1 * we1i; wd1i += ss1 * we1r; wd3r -= ss3 * we3i; wd3i += ss3 * we3r; } if (i0 == mh - 4) { break; } wl1r = cos(ew * i0); wl1i = sin(ew * i0); wl3i = 4 * wl1i * wl1r; wl3r = wl1r - wl3i * wl1i; wl3i = wl1i - wl3i * wl1r; we1r = wn4r * (wl1r - wl1i); we1i = wn4r * (wl1i + wl1r); we3r = -wn4r * (wl3r - wl3i); we3i = -wn4r * (wl3i + wl3r); wk1r = w1r * wl1r - w1i * wl1i; wk1i = w1r * wl1i + w1i * wl1r; wk3i = 4 * wk1i * wk1r; wk3r = wk1r - wk3i * wk1i; wk3i = wk1i - wk3i * wk1r; wd1r = wn4r * (wk1r - wk1i); wd1i = wn4r * (wk1i + wk1r); wd3r = -wn4r * (wk3r - wk3i); wd3i = -wn4r * (wk3i + wk3r); i = i0; } wl1r = WR2500; wl1i = WI2500; j0 = mh; j1 = j0 + m; j2 = j1 + m; j3 = j2 + m; x0r = a[j0 - 2] - a[j2 - 1]; x0i = a[j0 - 1] + a[j2 - 2]; x1r = a[j0 - 2] + a[j2 - 1]; x1i = a[j0 - 1] - a[j2 - 2]; x2r = a[j1 - 2] - a[j3 - 1]; x2i = a[j1 - 1] + a[j3 - 2]; x3r = a[j1 - 2] + a[j3 - 1]; x3i = a[j1 - 1] - a[j3 - 2]; y0r = wk1r * x0r - wk1i * x0i; y0i = wk1r * x0i + wk1i * x0r; y2r = wd1r * x2r - wd1i * x2i; y2i = wd1r * x2i + wd1i * x2r; a[j0 - 2] = y0r + y2r; a[j0 - 1] = y0i + y2i; a[j1 - 2] = y0r - y2r; a[j1 - 1] = y0i - y2i; y0r = wk3r * x1r + wk3i * x1i; y0i = wk3r * x1i - wk3i * x1r; y2r = wd3r * x3r + wd3i * x3i; y2i = wd3r * x3i - wd3i * x3r; a[j2 - 2] = y0r + y2r; a[j2 - 1] = y0i + y2i; a[j3 - 2] = y0r - y2r; a[j3 - 1] = y0i - y2i; x0r = a[j0] - a[j2 + 1]; x0i = a[j0 + 1] + a[j2]; x1r = a[j0] + a[j2 + 1]; x1i = a[j0 + 1] - a[j2]; x2r = a[j1] - a[j3 + 1]; x2i = a[j1 + 1] + a[j3]; x3r = a[j1] + a[j3 + 1]; x3i = a[j1 + 1] - a[j3]; y0r = wl1r * x0r - wl1i * x0i; y0i = wl1r * x0i + wl1i * x0r; y2r = wl1i * x2r - wl1r * x2i; y2i = wl1i * x2i + wl1r * x2r; a[j0] = y0r + y2r; a[j0 + 1] = y0i + y2i; a[j1] = y0r - y2r; a[j1 + 1] = y0i - y2i; y0r = wl1i * x1r - wl1r * x1i; y0i = wl1i * x1i + wl1r * x1r; y2r = wl1r * x3r - wl1i * x3i; y2i = wl1r * x3i + wl1i * x3r; a[j2] = y0r - y2r; a[j2 + 1] = y0i - y2i; a[j3] = y0r + y2r; a[j3 + 1] = y0i + y2i; x0r = a[j0 + 2] - a[j2 + 3]; x0i = a[j0 + 3] + a[j2 + 2]; x1r = a[j0 + 2] + a[j2 + 3]; x1i = a[j0 + 3] - a[j2 + 2]; x2r = a[j1 + 2] - a[j3 + 3]; x2i = a[j1 + 3] + a[j3 + 2]; x3r = a[j1 + 2] + a[j3 + 3]; x3i = a[j1 + 3] - a[j3 + 2]; y0r = wd1i * x0r - wd1r * x0i; y0i = wd1i * x0i + wd1r * x0r; y2r = wk1i * x2r - wk1r * x2i; y2i = wk1i * x2i + wk1r * x2r; a[j0 + 2] = y0r + y2r; a[j0 + 3] = y0i + y2i; a[j1 + 2] = y0r - y2r; a[j1 + 3] = y0i - y2i; y0r = wd3i * x1r + wd3r * x1i; y0i = wd3i * x1i - wd3r * x1r; y2r = wk3i * x3r + wk3r * x3i; y2i = wk3i * x3i - wk3r * x3r; a[j2 + 2] = y0r + y2r; a[j2 + 3] = y0i + y2i; a[j3 + 2] = y0r - y2r; a[j3 + 3] = y0i - y2i; } void cftfx41(int n, double *a) { void cftf161(double *a); void cftf162(double *a); void cftf081(double *a); void cftf082(double *a); if (n == 128) { cftf161(a); cftf162(&a[32]); cftf161(&a[64]); cftf161(&a[96]); } else { cftf081(a); cftf082(&a[16]); cftf081(&a[32]); cftf081(&a[48]); } } void cftf161(double *a) { double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = WR5000; wk1r = WR2500; wk1i = WI2500; x0r = a[0] + a[16]; x0i = a[1] + a[17]; x1r = a[0] - a[16]; x1i = a[1] - a[17]; x2r = a[8] + a[24]; x2i = a[9] + a[25]; x3r = a[8] - a[24]; x3i = a[9] - a[25]; y0r = x0r + x2r; y0i = x0i + x2i; y4r = x0r - x2r; y4i = x0i - x2i; y8r = x1r - x3i; y8i = x1i + x3r; y12r = x1r + x3i; y12i = x1i - x3r; x0r = a[2] + a[18]; x0i = a[3] + a[19]; x1r = a[2] - a[18]; x1i = a[3] - a[19]; x2r = a[10] + a[26]; x2i = a[11] + a[27]; x3r = a[10] - a[26]; x3i = a[11] - a[27]; y1r = x0r + x2r; y1i = x0i + x2i; y5r = x0r - x2r; y5i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y9r = wk1r * x0r - wk1i * x0i; y9i = wk1r * x0i + wk1i * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y13r = wk1i * x0r - wk1r * x0i; y13i = wk1i * x0i + wk1r * x0r; x0r = a[4] + a[20]; x0i = a[5] + a[21]; x1r = a[4] - a[20]; x1i = a[5] - a[21]; x2r = a[12] + a[28]; x2i = a[13] + a[29]; x3r = a[12] - a[28]; x3i = a[13] - a[29]; y2r = x0r + x2r; y2i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y10r = wn4r * (x0r - x0i); y10i = wn4r * (x0i + x0r); x0r = x1r + x3i; x0i = x1i - x3r; y14r = wn4r * (x0r + x0i); y14i = wn4r * (x0i - x0r); x0r = a[6] + a[22]; x0i = a[7] + a[23]; x1r = a[6] - a[22]; x1i = a[7] - a[23]; x2r = a[14] + a[30]; x2i = a[15] + a[31]; x3r = a[14] - a[30]; x3i = a[15] - a[31]; y3r = x0r + x2r; y3i = x0i + x2i; y7r = x0r - x2r; y7i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; y11r = wk1i * x0r - wk1r * x0i; y11i = wk1i * x0i + wk1r * x0r; x0r = x1r + x3i; x0i = x1i - x3r; y15r = wk1r * x0r - wk1i * x0i; y15i = wk1r * x0i + wk1i * x0r; x0r = y12r - y14r; x0i = y12i - y14i; x1r = y12r + y14r; x1i = y12i + y14i; x2r = y13r - y15r; x2i = y13i - y15i; x3r = y13r + y15r; x3i = y13i + y15i; a[24] = x0r + x2r; a[25] = x0i + x2i; a[26] = x0r - x2r; a[27] = x0i - x2i; a[28] = x1r - x3i; a[29] = x1i + x3r; a[30] = x1r + x3i; a[31] = x1i - x3r; x0r = y8r + y10r; x0i = y8i + y10i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; x3r = y9r - y11r; x3i = y9i - y11i; a[16] = x0r + x2r; a[17] = x0i + x2i; a[18] = x0r - x2r; a[19] = x0i - x2i; a[20] = x1r - x3i; a[21] = x1i + x3r; a[22] = x1r + x3i; a[23] = x1i - x3r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); x0r = y5r + y7i; x0i = y5i - y7r; x3r = wn4r * (x0r - x0i); x3i = wn4r * (x0i + x0r); x0r = y4r - y6i; x0i = y4i + y6r; x1r = y4r + y6i; x1i = y4i - y6r; a[8] = x0r + x2r; a[9] = x0i + x2i; a[10] = x0r - x2r; a[11] = x0i - x2i; a[12] = x1r - x3i; a[13] = x1i + x3r; a[14] = x1r + x3i; a[15] = x1i - x3r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r + y3r; x2i = y1i + y3i; x3r = y1r - y3r; x3i = y1i - y3i; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x0r - x2r; a[3] = x0i - x2i; a[4] = x1r - x3i; a[5] = x1i + x3r; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftf162(double *a) { double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i; wn4r = WR5000; wk1r = WR1250; wk1i = WI1250; wk2r = WR2500; wk2i = WI2500; wk3r = WR3750; wk3i = WI3750; x1r = a[0] - a[17]; x1i = a[1] + a[16]; x0r = a[8] - a[25]; x0i = a[9] + a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y0r = x1r + x2r; y0i = x1i + x2i; y4r = x1r - x2r; y4i = x1i - x2i; x1r = a[0] + a[17]; x1i = a[1] - a[16]; x0r = a[8] + a[25]; x0i = a[9] - a[24]; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); y8r = x1r - x2i; y8i = x1i + x2r; y12r = x1r + x2i; y12i = x1i - x2r; x0r = a[2] - a[19]; x0i = a[3] + a[18]; x1r = wk1r * x0r - wk1i * x0i; x1i = wk1r * x0i + wk1i * x0r; x0r = a[10] - a[27]; x0i = a[11] + a[26]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y1r = x1r + x2r; y1i = x1i + x2i; y5r = x1r - x2r; y5i = x1i - x2i; x0r = a[2] + a[19]; x0i = a[3] - a[18]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[10] + a[27]; x0i = a[11] - a[26]; x2r = wk1r * x0r + wk1i * x0i; x2i = wk1r * x0i - wk1i * x0r; y9r = x1r - x2r; y9i = x1i - x2i; y13r = x1r + x2r; y13i = x1i + x2i; x0r = a[4] - a[21]; x0i = a[5] + a[20]; x1r = wk2r * x0r - wk2i * x0i; x1i = wk2r * x0i + wk2i * x0r; x0r = a[12] - a[29]; x0i = a[13] + a[28]; x2r = wk2i * x0r - wk2r * x0i; x2i = wk2i * x0i + wk2r * x0r; y2r = x1r + x2r; y2i = x1i + x2i; y6r = x1r - x2r; y6i = x1i - x2i; x0r = a[4] + a[21]; x0i = a[5] - a[20]; x1r = wk2i * x0r - wk2r * x0i; x1i = wk2i * x0i + wk2r * x0r; x0r = a[12] + a[29]; x0i = a[13] - a[28]; x2r = wk2r * x0r - wk2i * x0i; x2i = wk2r * x0i + wk2i * x0r; y10r = x1r - x2r; y10i = x1i - x2i; y14r = x1r + x2r; y14i = x1i + x2i; x0r = a[6] - a[23]; x0i = a[7] + a[22]; x1r = wk3r * x0r - wk3i * x0i; x1i = wk3r * x0i + wk3i * x0r; x0r = a[14] - a[31]; x0i = a[15] + a[30]; x2r = wk1i * x0r - wk1r * x0i; x2i = wk1i * x0i + wk1r * x0r; y3r = x1r + x2r; y3i = x1i + x2i; y7r = x1r - x2r; y7i = x1i - x2i; x0r = a[6] + a[23]; x0i = a[7] - a[22]; x1r = wk1i * x0r + wk1r * x0i; x1i = wk1i * x0i - wk1r * x0r; x0r = a[14] + a[31]; x0i = a[15] - a[30]; x2r = wk3i * x0r - wk3r * x0i; x2i = wk3i * x0i + wk3r * x0r; y11r = x1r + x2r; y11i = x1i + x2i; y15r = x1r - x2r; y15i = x1i - x2i; x1r = y0r + y2r; x1i = y0i + y2i; x2r = y1r + y3r; x2i = y1i + y3i; a[0] = x1r + x2r; a[1] = x1i + x2i; a[2] = x1r - x2r; a[3] = x1i - x2i; x1r = y0r - y2r; x1i = y0i - y2i; x2r = y1r - y3r; x2i = y1i - y3i; a[4] = x1r - x2i; a[5] = x1i + x2r; a[6] = x1r + x2i; a[7] = x1i - x2r; x1r = y4r - y6i; x1i = y4i + y6r; x0r = y5r - y7i; x0i = y5i + y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[8] = x1r + x2r; a[9] = x1i + x2i; a[10] = x1r - x2r; a[11] = x1i - x2i; x1r = y4r + y6i; x1i = y4i - y6r; x0r = y5r + y7i; x0i = y5i - y7r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[12] = x1r - x2i; a[13] = x1i + x2r; a[14] = x1r + x2i; a[15] = x1i - x2r; x1r = y8r + y10r; x1i = y8i + y10i; x2r = y9r - y11r; x2i = y9i - y11i; a[16] = x1r + x2r; a[17] = x1i + x2i; a[18] = x1r - x2r; a[19] = x1i - x2i; x1r = y8r - y10r; x1i = y8i - y10i; x2r = y9r + y11r; x2i = y9i + y11i; a[20] = x1r - x2i; a[21] = x1i + x2r; a[22] = x1r + x2i; a[23] = x1i - x2r; x1r = y12r - y14i; x1i = y12i + y14r; x0r = y13r + y15i; x0i = y13i - y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[24] = x1r + x2r; a[25] = x1i + x2i; a[26] = x1r - x2r; a[27] = x1i - x2i; x1r = y12r + y14i; x1i = y12i - y14r; x0r = y13r - y15i; x0i = y13i + y15r; x2r = wn4r * (x0r - x0i); x2i = wn4r * (x0i + x0r); a[28] = x1r - x2i; a[29] = x1i + x2r; a[30] = x1r + x2i; a[31] = x1i - x2r; } void cftf081(double *a) { double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = WR5000; x0r = a[0] + a[8]; x0i = a[1] + a[9]; x1r = a[0] - a[8]; x1i = a[1] - a[9]; x2r = a[4] + a[12]; x2i = a[5] + a[13]; x3r = a[4] - a[12]; x3i = a[5] - a[13]; y0r = x0r + x2r; y0i = x0i + x2i; y2r = x0r - x2r; y2i = x0i - x2i; y1r = x1r - x3i; y1i = x1i + x3r; y3r = x1r + x3i; y3i = x1i - x3r; x0r = a[2] + a[10]; x0i = a[3] + a[11]; x1r = a[2] - a[10]; x1i = a[3] - a[11]; x2r = a[6] + a[14]; x2i = a[7] + a[15]; x3r = a[6] - a[14]; x3i = a[7] - a[15]; y4r = x0r + x2r; y4i = x0i + x2i; y6r = x0r - x2r; y6i = x0i - x2i; x0r = x1r - x3i; x0i = x1i + x3r; x2r = x1r + x3i; x2i = x1i - x3r; y5r = wn4r * (x0r - x0i); y5i = wn4r * (x0r + x0i); y7r = wn4r * (x2r - x2i); y7i = wn4r * (x2r + x2i); a[8] = y1r + y5r; a[9] = y1i + y5i; a[10] = y1r - y5r; a[11] = y1i - y5i; a[12] = y3r - y7i; a[13] = y3i + y7r; a[14] = y3r + y7i; a[15] = y3i - y7r; a[0] = y0r + y4r; a[1] = y0i + y4i; a[2] = y0r - y4r; a[3] = y0i - y4i; a[4] = y2r - y6i; a[5] = y2i + y6r; a[6] = y2r + y6i; a[7] = y2i - y6r; } void cftf082(double *a) { double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i; wn4r = WR5000; wk1r = WR2500; wk1i = WI2500; y0r = a[0] - a[9]; y0i = a[1] + a[8]; y1r = a[0] + a[9]; y1i = a[1] - a[8]; x0r = a[4] - a[13]; x0i = a[5] + a[12]; y2r = wn4r * (x0r - x0i); y2i = wn4r * (x0i + x0r); x0r = a[4] + a[13]; x0i = a[5] - a[12]; y3r = wn4r * (x0r - x0i); y3i = wn4r * (x0i + x0r); x0r = a[2] - a[11]; x0i = a[3] + a[10]; y4r = wk1r * x0r - wk1i * x0i; y4i = wk1r * x0i + wk1i * x0r; x0r = a[2] + a[11]; x0i = a[3] - a[10]; y5r = wk1i * x0r - wk1r * x0i; y5i = wk1i * x0i + wk1r * x0r; x0r = a[6] - a[15]; x0i = a[7] + a[14]; y6r = wk1i * x0r - wk1r * x0i; y6i = wk1i * x0i + wk1r * x0r; x0r = a[6] + a[15]; x0i = a[7] - a[14]; y7r = wk1r * x0r - wk1i * x0i; y7i = wk1r * x0i + wk1i * x0r; x0r = y0r + y2r; x0i = y0i + y2i; x1r = y4r + y6r; x1i = y4i + y6i; a[0] = x0r + x1r; a[1] = x0i + x1i; a[2] = x0r - x1r; a[3] = x0i - x1i; x0r = y0r - y2r; x0i = y0i - y2i; x1r = y4r - y6r; x1i = y4i - y6i; a[4] = x0r - x1i; a[5] = x0i + x1r; a[6] = x0r + x1i; a[7] = x0i - x1r; x0r = y1r - y3i; x0i = y1i + y3r; x1r = y5r - y7r; x1i = y5i - y7i; a[8] = x0r + x1r; a[9] = x0i + x1i; a[10] = x0r - x1r; a[11] = x0i - x1i; x0r = y1r + y3i; x0i = y1i - y3r; x1r = y5r + y7r; x1i = y5i + y7i; a[12] = x0r - x1i; a[13] = x0i + x1r; a[14] = x0r + x1i; a[15] = x0i - x1r; } void cftf040(double *a) { double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r - x3i; a[3] = x1i + x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r + x3i; a[7] = x1i - x3r; } void cftb040(double *a) { double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; x0r = a[0] + a[4]; x0i = a[1] + a[5]; x1r = a[0] - a[4]; x1i = a[1] - a[5]; x2r = a[2] + a[6]; x2i = a[3] + a[7]; x3r = a[2] - a[6]; x3i = a[3] - a[7]; a[0] = x0r + x2r; a[1] = x0i + x2i; a[2] = x1r + x3i; a[3] = x1i - x3r; a[4] = x0r - x2r; a[5] = x0i - x2i; a[6] = x1r - x3i; a[7] = x1i + x3r; } void cftx020(double *a) { double x0r, x0i; x0r = a[0] - a[2]; x0i = a[1] - a[3]; a[0] += a[2]; a[1] += a[3]; a[2] = x0r; a[3] = x0i; } void rftfsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[j + 2] -= yr; a[j + 3] -= yi; a[k - 2] += yr; a[k - 1] -= yi; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr - wki * xi; yi = wkr * xi + wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr - wdi * xi; yi = wdr * xi + wdi * xr; a[2] -= yr; a[3] -= yi; a[n - 2] += yr; a[n - 1] -= yi; } void rftbsub(int n, double *a) { int i, i0, j, k; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = 2 * M_PI_2 / n; wkr = 0; wki = 0; wdi = cos(ec); wdr = sin(ec); wdi *= wdr; wdr *= wdr; w1r = 1 - 2 * wdr; w1i = 2 * wdi; ss = 2 * w1i; i = n >> 1; for (;;) { i0 = i - 4 * RDFT_LOOP_DIV; if (i0 < 4) { i0 = 4; } for (j = i - 4; j >= i0; j -= 4) { k = n - j; xr = a[j + 2] - a[k - 2]; xi = a[j + 3] + a[k - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[j + 2] -= yr; a[j + 3] -= yi; a[k - 2] += yr; a[k - 1] -= yi; wkr += ss * wdi; wki += ss * (0.5 - wdr); xr = a[j] - a[k]; xi = a[j + 1] + a[k + 1]; yr = wkr * xr + wki * xi; yi = wkr * xi - wki * xr; a[j] -= yr; a[j + 1] -= yi; a[k] += yr; a[k + 1] -= yi; wdr += ss * wki; wdi += ss * (0.5 - wkr); } if (i0 == 4) { break; } wkr = 0.5 * sin(ec * i0); wki = 0.5 * cos(ec * i0); wdr = 0.5 - (wkr * w1r - wki * w1i); wdi = wkr * w1i + wki * w1r; wkr = 0.5 - wkr; i = i0; } xr = a[2] - a[n - 2]; xi = a[3] + a[n - 1]; yr = wdr * xr + wdi * xi; yi = wdr * xi - wdi * xr; a[2] -= yr; a[3] -= yi; a[n - 2] += yr; a[n - 1] -= yi; } void dctsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[j - 1] - wdr * a[k + 1]; xi = wdr * a[j - 1] + wdi * a[k + 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[j] - wkr * a[k]; yi = wkr * a[j] + wki * a[k]; wdr -= ss * wki; wdi += ss * wkr; a[k + 1] = xr; a[k] = yr; a[j - 1] = xi; a[j] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m - 1] - wdr * a[m + 1]; a[m - 1] = wdr * a[m - 1] + wdi * a[m + 1]; a[m + 1] = xr; a[m] *= WR5000; } void dstsub(int n, double *a) { int i, i0, j, k, m; double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; ec = M_PI_2 / n; wkr = 0.5; wki = 0.5; w1r = cos(ec); w1i = sin(ec); wdr = 0.5 * (w1r - w1i); wdi = 0.5 * (w1r + w1i); ss = 2 * w1i; m = n >> 1; i = 0; for (;;) { i0 = i + 2 * DCST_LOOP_DIV; if (i0 > m - 2) { i0 = m - 2; } for (j = i + 2; j <= i0; j += 2) { k = n - j; xr = wdi * a[k + 1] - wdr * a[j - 1]; xi = wdr * a[k + 1] + wdi * a[j - 1]; wkr -= ss * wdi; wki += ss * wdr; yr = wki * a[k] - wkr * a[j]; yi = wkr * a[k] + wki * a[j]; wdr -= ss * wki; wdi += ss * wkr; a[j - 1] = xr; a[j] = yr; a[k + 1] = xi; a[k] = yi; } if (i0 == m - 2) { break; } wdr = cos(ec * i0); wdi = sin(ec * i0); wkr = 0.5 * (wdr - wdi); wki = 0.5 * (wdr + wdi); wdr = wkr * w1r - wki * w1i; wdi = wkr * w1i + wki * w1r; i = i0; } xr = wdi * a[m + 1] - wdr * a[m - 1]; a[m + 1] = wdr * a[m + 1] + wdi * a[m - 1]; a[m - 1] = xr; a[m] *= WR5000; } void dctsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[1] - wdr * a[3]; a[1] = wdr * a[1] + wdi * a[3]; a[3] = xr; } a[m] *= wki; } void dstsub4(int n, double *a) { int m; double wki, wdr, wdi, xr; wki = WR5000; m = n >> 1; if (m == 2) { wdr = wki * WI2500; wdi = wki * WR2500; xr = wdi * a[3] - wdr * a[1]; a[3] = wdr * a[3] + wdi * a[1]; a[1] = xr; } a[m] *= wki; } OouraFFT-1.0/readme.txt000066400000000000000000000160171246725775500150260ustar00rootroot00000000000000General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package Description: A package to calculate Discrete Fourier/Cosine/Sine Transforms of 1-dimensional sequences of length 2^N. Files: fft4g.c : FFT Package in C - Fast Version I (radix 4,2) fft4g.f : FFT Package in Fortran - Fast Version I (radix 4,2) fft4g_h.c : FFT Package in C - Simple Version I (radix 4,2) fft8g.c : FFT Package in C - Fast Version II (radix 8,4,2) fft8g.f : FFT Package in Fortran - Fast Version II (radix 8,4,2) fft8g_h.c : FFT Package in C - Simple Version II (radix 8,4,2) fftsg.c : FFT Package in C - Fast Version III (Split-Radix) fftsg.f : FFT Package in Fortran - Fast Version III (Split-Radix) fftsg_h.c : FFT Package in C - Simple Version III (Split-Radix) readme.txt : Readme File sample1/ : Test Directory Makefile : for gcc, cc Makefile.f77: for Fortran testxg.c : Test Program for "fft*g.c" testxg.f : Test Program for "fft*g.f" testxg_h.c : Test Program for "fft*g_h.c" sample2/ : Benchmark Directory Makefile : for gcc, cc Makefile.pth: POSIX Thread version pi_fft.c : PI(= 3.1415926535897932384626...) Calculation Program for a Benchmark Test for "fft*g.c" Difference of the Files: C and Fortran versions are equal and the same routines are in each version. "fft4g*.*" are optimized for most machines. "fft8g*.*" are fast on the UltraSPARC. "fftsg*.*" are optimized for the machines that have the multi-level (L1,L2,etc) cache. The simple versions "fft*g_h.c" use no work area, but the fast versions "fft*g.*" use work areas. The fast versions "fft*g.*" have the same specification. Routines in the Package: cdft: Complex Discrete Fourier Transform rdft: Real Discrete Fourier Transform ddct: Discrete Cosine Transform ddst: Discrete Sine Transform dfct: Cosine Transform of RDFT (Real Symmetric DFT) dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) Usage: Please refer to the comments in the "fft**.*" file which you want to use. Brief explanations are in the block comments of each package. The examples are also given in the test programs. Method: -------- cdft -------- fft4g*.*, fft8g*.*: A method of in-place, radix 2^M, Sande-Tukey (decimation in frequency). Index of the butterfly loop is in bit reverse order to keep continuous memory access. fftsg*.*: A method of in-place, Split-Radix, recursive fast algorithm. -------- rdft -------- A method with a following butterfly operation appended to "cdft". In forward transform : A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2, W(n) = exp(2*pi*i/n), this routine makes an array x[] : x[j] = a[2*j] + i*a[2*j+1], 0<=j #include #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200.0)) #ifndef NMAX #define NMAX 8192 #define NMAXSQRT 64 #endif void cdft(int, int, double *, int *, double *); void rdft(int, int, double *, int *, double *); void ddct(int, int, double *, int *, double *); void ddst(int, int, double *, int *, double *); void dfct(int, double *, double *, int *, double *); void dfst(int, double *, double *, int *, double *); void putdata(int nini, int nend, double *a); double errorcheck(int nini, int nend, double scale, double *a); int main() { int n, ip[NMAXSQRT + 2]; double a[NMAX + 1], w[NMAX * 5 / 4], t[NMAX / 2 + 1], err; printf("data length n=? (must be 2^m)\n"); scanf("%d", &n); ip[0] = 0; /* check of CDFT */ putdata(0, n - 1, a); cdft(n, 1, a, ip, w); cdft(n, -1, a, ip, w); err = errorcheck(0, n - 1, 2.0 / n, a); printf("cdft err= %g \n", err); /* check of RDFT */ putdata(0, n - 1, a); rdft(n, 1, a, ip, w); rdft(n, -1, a, ip, w); err = errorcheck(0, n - 1, 2.0 / n, a); printf("rdft err= %g \n", err); /* check of DDCT */ putdata(0, n - 1, a); ddct(n, 1, a, ip, w); ddct(n, -1, a, ip, w); a[0] *= 0.5; err = errorcheck(0, n - 1, 2.0 / n, a); printf("ddct err= %g \n", err); /* check of DDST */ putdata(0, n - 1, a); ddst(n, 1, a, ip, w); ddst(n, -1, a, ip, w); a[0] *= 0.5; err = errorcheck(0, n - 1, 2.0 / n, a); printf("ddst err= %g \n", err); /* check of DFCT */ putdata(0, n, a); a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); a[0] *= 0.5; a[n] *= 0.5; dfct(n, a, t, ip, w); err = errorcheck(0, n, 2.0 / n, a); printf("dfct err= %g \n", err); /* check of DFST */ putdata(1, n - 1, a); dfst(n, a, t, ip, w); dfst(n, a, t, ip, w); err = errorcheck(1, n - 1, 2.0 / n, a); printf("dfst err= %g \n", err); return 0; } void putdata(int nini, int nend, double *a) { int j, seed = 0; for (j = nini; j <= nend; j++) { a[j] = RND(&seed); } } double errorcheck(int nini, int nend, double scale, double *a) { int j, seed = 0; double err = 0, e; for (j = nini; j <= nend; j++) { e = RND(&seed) - a[j] * scale; err = MAX(err, fabs(e)); } return err; } OouraFFT-1.0/sample1/testxg.f000066400000000000000000000045721246725775500160620ustar00rootroot00000000000000! test of fft*g.f ! ! program main integer nmax, nmaxsqrt parameter (nmax = 32768) parameter (nmaxsqrt = 128) integer n, ip(0 : nmaxsqrt + 1) real*8 a(0 : nmax), w(0 : nmax * 5 / 4 - 1), t(0 : nmax / 2), & err, errorcheck ! write (*, *) 'data length n=? (must be 2**m)' read (*, *) n ip(0) = 0 ! ! check of CDFT call putdata(0, n - 1, a) call cdft(n, 1, a, ip, w) call cdft(n, -1, a, ip, w) err = errorcheck(0, n - 1, 2.0d0 / n, a) write (*, *) 'cdft err= ', err ! ! check of RDFT call putdata(0, n - 1, a) call rdft(n, 1, a, ip, w) call rdft(n, -1, a, ip, w) err = errorcheck(0, n - 1, 2.0d0 / n, a) write (*, *) 'rdft err= ', err ! ! check of DDCT call putdata(0, n - 1, a) call ddct(n, 1, a, ip, w) call ddct(n, -1, a, ip, w) a(0) = a(0) * 0.5d0 err = errorcheck(0, n - 1, 2.0d0 / n, a) write (*, *) 'ddct err= ', err ! ! check of DDST call putdata(0, n - 1, a) call ddst(n, 1, a, ip, w) call ddst(n, -1, a, ip, w) a(0) = a(0) * 0.5d0 err = errorcheck(0, n - 1, 2.0d0 / n, a) write (*, *) 'ddst err= ', err ! ! check of DFCT call putdata(0, n, a) a(0) = a(0) * 0.5d0 a(n) = a(n) * 0.5d0 call dfct(n, a, t, ip, w) a(0) = a(0) * 0.5d0 a(n) = a(n) * 0.5d0 call dfct(n, a, t, ip, w) err = errorcheck(0, n, 2.0d0 / n, a) write (*, *) 'dfct err= ', err ! ! check of DFST call putdata(1, n - 1, a) call dfst(n, a, t, ip, w) call dfst(n, a, t, ip, w) err = errorcheck(1, n - 1, 2.0d0 / n, a) write (*, *) 'dfst err= ', err ! end ! ! subroutine putdata(nini, nend, a) integer nini, nend, j, seed real*8 a(0 : *), drnd seed = 0 do j = nini, nend a(j) = drnd(seed) end do end ! ! function errorcheck(nini, nend, scale, a) integer nini, nend, j, seed real*8 scale, a(0 : *), drnd, err, e, errorcheck err = 0 seed = 0 do j = nini, nend e = drnd(seed) - a(j) * scale err = max(err, abs(e)) end do errorcheck = err end ! ! ! random number generator, 0 <= drnd < 1 real*8 function drnd(seed) integer seed seed = mod(seed * 7141 + 54773, 259200) drnd = seed * (1.0d0 / 259200.0d0) end ! OouraFFT-1.0/sample1/testxg_h.c000066400000000000000000000042021246725775500163540ustar00rootroot00000000000000/* test of fft*g_h.c */ #include #include #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200.0)) #ifndef NMAX #define NMAX 8192 #endif void cdft(int, int, double *); void rdft(int, int, double *); void ddct(int, int, double *); void ddst(int, int, double *); void dfct(int, double *); void dfst(int, double *); void putdata(int nini, int nend, double *a); double errorcheck(int nini, int nend, double scale, double *a); int main() { int n; double a[NMAX + 1], err; printf("data length n=? (must be 2^m)\n"); scanf("%d", &n); /* check of CDFT */ putdata(0, n - 1, a); cdft(n, 1, a); cdft(n, -1, a); err = errorcheck(0, n - 1, 2.0 / n, a); printf("cdft err= %g \n", err); /* check of RDFT */ putdata(0, n - 1, a); rdft(n, 1, a); rdft(n, -1, a); err = errorcheck(0, n - 1, 2.0 / n, a); printf("rdft err= %g \n", err); /* check of DDCT */ putdata(0, n - 1, a); ddct(n, 1, a); ddct(n, -1, a); a[0] *= 0.5; err = errorcheck(0, n - 1, 2.0 / n, a); printf("ddct err= %g \n", err); /* check of DDST */ putdata(0, n - 1, a); ddst(n, 1, a); ddst(n, -1, a); a[0] *= 0.5; err = errorcheck(0, n - 1, 2.0 / n, a); printf("ddst err= %g \n", err); /* check of DFCT */ putdata(0, n, a); a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); err = errorcheck(0, n, 2.0 / n, a); printf("dfct err= %g \n", err); /* check of DFST */ putdata(1, n - 1, a); dfst(n, a); dfst(n, a); err = errorcheck(1, n - 1, 2.0 / n, a); printf("dfst err= %g \n", err); return 0; } void putdata(int nini, int nend, double *a) { int j, seed = 0; for (j = nini; j <= nend; j++) { a[j] = RND(&seed); } } double errorcheck(int nini, int nend, double scale, double *a) { int j, seed = 0; double err = 0, e; for (j = nini; j <= nend; j++) { e = RND(&seed) - a[j] * scale; err = MAX(err, fabs(e)); } return err; } OouraFFT-1.0/sample2/000077500000000000000000000000001246725775500143665ustar00rootroot00000000000000OouraFFT-1.0/sample2/Makefile.pth000066400000000000000000000010331246725775500166150ustar00rootroot00000000000000# ---- for GNU gcc ---- CC = gcc CFLAGS = -Wall -DUSE_CDFT_PTHREADS OFLAGS_FFT = -O6 -ffast-math OFLAGS_PI = -O6 -ffast-math # ---- for SUN WS cc ---- # #CC = cc # #CFLAGS = -DUSE_CDFT_PTHREADS # #OFLAGS_FFT = -fast -xO5 #OFLAGS_PI = -fast -xO5 all: pi_fftsgpt pi_fftsgpt : pi_fft.o fftsgpt.o $(CC) pi_fft.o fftsgpt.o -lm -lpthread -o pi_fftsgpt pi_fft.o : pi_fft.c $(CC) $(CFLAGS) $(OFLAGS_PI) -c pi_fft.c -o pi_fft.o fftsgpt.o : ../fftsg.c $(CC) $(CFLAGS) $(OFLAGS_FFT) -c ../fftsg.c -o fftsgpt.o clean: rm -f *.o OouraFFT-1.0/sample2/pi_fft.c000066400000000000000000001312671246725775500160130ustar00rootroot00000000000000/* ---- calculation of PI(= 3.14159...) using FFT ---- by T.Ooura, ver. LG1.1.2-MP1.5a Sep. 2001. This is a test program to estimate the performance of the FFT routines: fft*g.c. Example compilation: GNU : gcc -O6 -ffast-math pi_fft.c fftsg.c -lm -o pi_fftsg SUN : cc -fast -xO5 pi_fft.c fft8g.c -lm -o pi_fft8g Microsoft: cl /O2 /G6 pi_fft.c fft4g.c /Fepi_fft4g.exe ... etc. */ /* Please check the following macros before compiling */ #ifndef DBL_ERROR_MARGIN #define DBL_ERROR_MARGIN 0.3 /* must be < 0.5 */ #endif #include #include #include #include #include #include void mp_load_0(int n, int radix, int out[]); void mp_load_1(int n, int radix, int out[]); void mp_copy(int n, int radix, int in[], int out[]); void mp_round(int n, int radix, int m, int inout[]); int mp_cmp(int n, int radix, int in1[], int in2[]); void mp_add(int n, int radix, int in1[], int in2[], int out[]); void mp_sub(int n, int radix, int in1[], int in2[], int out[]); void mp_imul(int n, int radix, int in1[], int in2, int out[]); int mp_idiv(int n, int radix, int in1[], int in2, int out[]); void mp_idiv_2(int n, int radix, int in[], int out[]); double mp_mul_radix_test(int n, int radix, int nfft, double tmpfft[], int ip[], double w[]); void mp_mul(int n, int radix, int in1[], int in2[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], double tmp3fft[], int ip[], double w[]); void mp_squ(int n, int radix, int in[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[], int ip[], double w[]); void mp_squh(int n, int radix, int in[], int out[], int nfft, double inoutfft[], int ip[], double w[]); int mp_inv(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); int mp_sqrt(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); void mp_sprintf(int n, int log10_radix, int in[], char out[]); void mp_sscanf(int n, int log10_radix, char in[], int out[]); void mp_fprintf(int n, int log10_radix, int in[], FILE *fout); int main() { int nfft, log2_nfft, radix, log10_radix, n, npow, nprc; double err, d_time, n_op; int *a, *b, *c, *e, *i1, *i2, *ip; double *d1, *d2, *d3, *w; time_t t_1, t_2; FILE *f_log, *f_out; f_log = fopen("pi.log", "w"); printf("PI calculation to estimate the FFT benchmarks\n"); fprintf(f_log, "PI calculation to estimate the FFT benchmarks\n"); printf("length of FFT =?\n"); scanf("%d", &nfft); printf("initializing...\n"); for (log2_nfft = 1; (1 << log2_nfft) < nfft; log2_nfft++); nfft = 1 << log2_nfft; n = nfft + 2; ip = (int *) malloc((3 + (int) sqrt(0.5 * nfft)) * sizeof(int)); w = (double *) malloc(nfft / 2 * sizeof(double)); a = (int *) malloc((n + 2) * sizeof(int)); b = (int *) malloc((n + 2) * sizeof(int)); c = (int *) malloc((n + 2) * sizeof(int)); e = (int *) malloc((n + 2) * sizeof(int)); i1 = (int *) malloc((n + 2) * sizeof(int)); i2 = (int *) malloc((n + 2) * sizeof(int)); d1 = (double *) malloc((nfft + 2) * sizeof(double)); d2 = (double *) malloc((nfft + 2) * sizeof(double)); d3 = (double *) malloc((nfft + 2) * sizeof(double)); if (d3 == NULL) { printf("Allocation Failure!\n"); exit(1); } ip[0] = 0; /* ---- radix test ---- */ log10_radix = 1; radix = 10; err = mp_mul_radix_test(n, radix, nfft, d1, ip, w); err += DBL_EPSILON * (n * radix * radix / 4); while (100 * err < DBL_ERROR_MARGIN && radix <= INT_MAX / 20) { err *= 100; log10_radix++; radix *= 10; } printf("nfft= %d\nradix= %d\nerror_margin= %g\n", nfft, radix, err); fprintf(f_log, "nfft= %d\nradix= %d\nerror_margin= %g\n", nfft, radix, err); printf("calculating %d digits of PI...\n", log10_radix * (n - 2)); fprintf(f_log, "calculating %d digits of PI...\n", log10_radix * (n - 2)); /* ---- time check ---- */ time(&t_1); /* * ---- a formula based on the AGM (Arithmetic-Geometric Mean) ---- * c = sqrt(0.125); * a = 1 + 3 * c; * b = sqrt(a); * e = b - 0.625; * b = 2 * b; * c = e - c; * a = a + e; * npow = 4; * do { * npow = 2 * npow; * e = (a + b) / 2; * b = sqrt(a * b); * e = e - b; * b = 2 * b; * c = c - e; * a = e + b; * } while (e > SQRT_SQRT_EPSILON); * e = e * e / 4; * a = a + b; * pi = (a * a - e - e / 2) / (a * c - e) / npow; * ---- modification ---- * This is a modified version of Gauss-Legendre formula * (by T.Ooura). It is faster than original version. * ---- reference ---- * 1. E.Salamin, * Computation of PI Using Arithmetic-Geometric Mean, * Mathematics of Computation, Vol.30 1976. * 2. R.P.Brent, * Fast Multiple-Precision Evaluation of Elementary Functions, * J. ACM 23 1976. * 3. D.Takahasi, Y.Kanada, * Calculation of PI to 51.5 Billion Decimal Digits on * Distributed Memoriy Parallel Processors, * Transactions of Information Processing Society of Japan, * Vol.39 No.7 1998. * 4. T.Ooura, * Improvement of the PI Calculation Algorithm and * Implementation of Fast Multiple-Precision Computation, * Information Processing Society of Japan SIG Notes, * 98-HPC-74, 1998. */ /* ---- c = sqrt(0.125) ---- */ mp_sscanf(n, log10_radix, "0.125", a); mp_sqrt(n, radix, a, c, i1, i2, nfft, d1, d2, ip, w); /* ---- a = 1 + 3 * c ---- */ mp_imul(n, radix, c, 3, e); mp_sscanf(n, log10_radix, "1", a); mp_add(n, radix, a, e, a); /* ---- b = sqrt(a) ---- */ mp_sqrt(n, radix, a, b, i1, i2, nfft, d1, d2, ip, w); /* ---- e = b - 0.625 ---- */ mp_sscanf(n, log10_radix, "0.625", e); mp_sub(n, radix, b, e, e); /* ---- b = 2 * b ---- */ mp_add(n, radix, b, b, b); /* ---- c = e - c ---- */ mp_sub(n, radix, e, c, c); /* ---- a = a + e ---- */ mp_add(n, radix, a, e, a); printf("AGM iteration\n"); fprintf(f_log, "AGM iteration\n"); npow = 4; do { npow *= 2; /* ---- e = (a + b) / 2 ---- */ mp_add(n, radix, a, b, e); mp_idiv_2(n, radix, e, e); /* ---- b = sqrt(a * b) ---- */ mp_mul(n, radix, a, b, a, i1, nfft, d1, d2, d3, ip, w); mp_sqrt(n, radix, a, b, i1, i2, nfft, d1, d2, ip, w); /* ---- e = e - b ---- */ mp_sub(n, radix, e, b, e); /* ---- b = 2 * b ---- */ mp_add(n, radix, b, b, b); /* ---- c = c - e ---- */ mp_sub(n, radix, c, e, c); /* ---- a = e + b ---- */ mp_add(n, radix, e, b, a); /* ---- convergence check ---- */ nprc = -e[1]; if (e[0] == 0) { nprc = n; } printf("precision= %d\n", 4 * nprc * log10_radix); fprintf(f_log, "precision= %d\n", 4 * nprc * log10_radix); } while (4 * nprc <= n); /* ---- e = e * e / 4 (half precision) ---- */ mp_idiv_2(n, radix, e, e); mp_squh(n, radix, e, e, nfft, d1, ip, w); /* ---- a = a + b ---- */ mp_add(n, radix, a, b, a); /* ---- a = (a * a - e - e / 2) / (a * c - e) / npow ---- */ mp_mul(n, radix, a, c, c, i1, nfft, d1, d2, d3, ip, w); mp_sub(n, radix, c, e, c); mp_inv(n, radix, c, b, i1, i2, nfft, d1, d2, ip, w); mp_squ(n, radix, a, a, i1, nfft, d1, d2, ip, w); mp_sub(n, radix, a, e, a); mp_idiv_2(n, radix, e, e); mp_sub(n, radix, a, e, a); mp_mul(n, radix, a, b, a, i1, nfft, d1, d2, d3, ip, w); mp_idiv(n, radix, a, npow, a); /* ---- time check ---- */ time(&t_2); /* ---- output ---- */ f_out = fopen("pi.dat", "w"); printf("writing pi.dat...\n"); mp_fprintf(n - 1, log10_radix, a, f_out); fclose(f_out); free(d3); free(d2); free(d1); free(i2); free(i1); free(e); free(c); free(b); free(a); free(w); free(ip); /* ---- benchmark ---- */ n_op = 50.0 * nfft * log2_nfft * log2_nfft; printf("floating point operation: %g op.\n", n_op); fprintf(f_log, "floating point operation: %g op.\n", n_op); /* ---- difftime ---- */ d_time = difftime(t_2, t_1); printf("execution time: %g sec. (real time)\n", d_time); fprintf(f_log, "execution time: %g sec. (real time)\n", d_time); fclose(f_log); return 0; } /* -------- multiple precision routines -------- */ #include #include #include /* ---- floating point format ---- data := data[0] * pow(radix, data[1]) * (data[2] + data[3]/radix + data[4]/radix/radix + ...), data[0] : sign (1;data>0, -1;data<0, 0;data==0) data[1] : exponent (0;data==0) data[2...n+1] : digits ---- function prototypes ---- void mp_load_0(int n, int radix, int out[]); void mp_load_1(int n, int radix, int out[]); void mp_copy(int n, int radix, int in[], int out[]); void mp_round(int n, int radix, int m, int inout[]); int mp_cmp(int n, int radix, int in1[], int in2[]); void mp_add(int n, int radix, int in1[], int in2[], int out[]); void mp_sub(int n, int radix, int in1[], int in2[], int out[]); void mp_imul(int n, int radix, int in1[], int in2, int out[]); int mp_idiv(int n, int radix, int in1[], int in2, int out[]); void mp_idiv_2(int n, int radix, int in[], int out[]); double mp_mul_radix_test(int n, int radix, int nfft, double tmpfft[], int ip[], double w[]); void mp_mul(int n, int radix, int in1[], int in2[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], double tmp3fft[], int ip[], double w[]); void mp_squ(int n, int radix, int in[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[], int ip[], double w[]); void mp_squh(int n, int radix, int in[], int out[], int nfft, double inoutfft[], int ip[], double w[]); int mp_inv(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); int mp_sqrt(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); void mp_sprintf(int n, int log10_radix, int in[], char out[]); void mp_sscanf(int n, int log10_radix, char in[], int out[]); void mp_fprintf(int n, int log10_radix, int in[], FILE *fout); ---- */ /* -------- mp_load routines -------- */ void mp_load_0(int n, int radix, int out[]) { int j; for (j = 0; j <= n + 1; j++) { out[j] = 0; } } void mp_load_1(int n, int radix, int out[]) { int j; out[0] = 1; out[1] = 0; out[2] = 1; for (j = 3; j <= n + 1; j++) { out[j] = 0; } } void mp_copy(int n, int radix, int in[], int out[]) { int j; for (j = 0; j <= n + 1; j++) { out[j] = in[j]; } } void mp_round(int n, int radix, int m, int inout[]) { int j, x; if (m < n) { for (j = n + 1; j > m + 2; j--) { inout[j] = 0; } x = 2 * inout[m + 2]; inout[m + 2] = 0; if (x >= radix) { for (j = m + 1; j >= 2; j--) { x = inout[j] + 1; if (x < radix) { inout[j] = x; break; } inout[j] = 0; } if (x >= radix) { inout[2] = 1; inout[1]++; } } } } /* -------- mp_add routines -------- */ int mp_cmp(int n, int radix, int in1[], int in2[]) { int mp_unsgn_cmp(int n, int in1[], int in2[]); if (in1[0] > in2[0]) { return 1; } else if (in1[0] < in2[0]) { return -1; } return in1[0] * mp_unsgn_cmp(n, &in1[1], &in2[1]); } void mp_add(int n, int radix, int in1[], int in2[], int out[]) { int mp_unsgn_cmp(int n, int in1[], int in2[]); int mp_unexp_add(int n, int radix, int expdif, int in1[], int in2[], int out[]); int mp_unexp_sub(int n, int radix, int expdif, int in1[], int in2[], int out[]); int outsgn, outexp, expdif; expdif = in1[1] - in2[1]; outexp = in1[1]; if (expdif < 0) { outexp = in2[1]; } outsgn = in1[0] * in2[0]; if (outsgn >= 0) { if (outsgn > 0) { outsgn = in1[0]; } else { outsgn = in1[0] + in2[0]; outexp = in1[1] + in2[1]; expdif = 0; } if (expdif >= 0) { outexp += mp_unexp_add(n, radix, expdif, &in1[2], &in2[2], &out[2]); } else { outexp += mp_unexp_add(n, radix, -expdif, &in2[2], &in1[2], &out[2]); } } else { outsgn = mp_unsgn_cmp(n, &in1[1], &in2[1]); if (outsgn >= 0) { expdif = mp_unexp_sub(n, radix, expdif, &in1[2], &in2[2], &out[2]); } else { expdif = mp_unexp_sub(n, radix, -expdif, &in2[2], &in1[2], &out[2]); } outexp -= expdif; outsgn *= in1[0]; if (expdif == n) { outsgn = 0; } } if (outsgn == 0) { outexp = 0; } out[0] = outsgn; out[1] = outexp; } void mp_sub(int n, int radix, int in1[], int in2[], int out[]) { int mp_unsgn_cmp(int n, int in1[], int in2[]); int mp_unexp_add(int n, int radix, int expdif, int in1[], int in2[], int out[]); int mp_unexp_sub(int n, int radix, int expdif, int in1[], int in2[], int out[]); int outsgn, outexp, expdif; expdif = in1[1] - in2[1]; outexp = in1[1]; if (expdif < 0) { outexp = in2[1]; } outsgn = in1[0] * in2[0]; if (outsgn <= 0) { if (outsgn < 0) { outsgn = in1[0]; } else { outsgn = in1[0] - in2[0]; outexp = in1[1] + in2[1]; expdif = 0; } if (expdif >= 0) { outexp += mp_unexp_add(n, radix, expdif, &in1[2], &in2[2], &out[2]); } else { outexp += mp_unexp_add(n, radix, -expdif, &in2[2], &in1[2], &out[2]); } } else { outsgn = mp_unsgn_cmp(n, &in1[1], &in2[1]); if (outsgn >= 0) { expdif = mp_unexp_sub(n, radix, expdif, &in1[2], &in2[2], &out[2]); } else { expdif = mp_unexp_sub(n, radix, -expdif, &in2[2], &in1[2], &out[2]); } outexp -= expdif; outsgn *= in1[0]; if (expdif == n) { outsgn = 0; } } if (outsgn == 0) { outexp = 0; } out[0] = outsgn; out[1] = outexp; } /* -------- mp_add child routines -------- */ int mp_unsgn_cmp(int n, int in1[], int in2[]) { int j, cmp; cmp = 0; for (j = 0; j <= n && cmp == 0; j++) { cmp = in1[j] - in2[j]; } if (cmp > 0) { cmp = 1; } else if (cmp < 0) { cmp = -1; } return cmp; } int mp_unexp_add(int n, int radix, int expdif, int in1[], int in2[], int out[]) { int j, x, carry; carry = 0; if (expdif == 0 && in1[0] + in2[0] >= radix) { x = in1[n - 1] + in2[n - 1]; carry = x >= radix ? -1 : 0; for (j = n - 1; j > 0; j--) { x = in1[j - 1] + in2[j - 1] - carry; carry = x >= radix ? -1 : 0; out[j] = x - (radix & carry); } out[0] = -carry; } else { if (expdif > n) { expdif = n; } for (j = n - 1; j >= expdif; j--) { x = in1[j] + in2[j - expdif] - carry; carry = x >= radix ? -1 : 0; out[j] = x - (radix & carry); } for (j = expdif - 1; j >= 0; j--) { x = in1[j] - carry; carry = x >= radix ? -1 : 0; out[j] = x - (radix & carry); } if (carry != 0) { for (j = n - 1; j > 0; j--) { out[j] = out[j - 1]; } out[0] = -carry; } } return -carry; } int mp_unexp_sub(int n, int radix, int expdif, int in1[], int in2[], int out[]) { int j, x, borrow, ncancel; if (expdif > n) { expdif = n; } borrow = 0; for (j = n - 1; j >= expdif; j--) { x = in1[j] - in2[j - expdif] + borrow; borrow = x < 0 ? -1 : 0; out[j] = x + (radix & borrow); } for (j = expdif - 1; j >= 0; j--) { x = in1[j] + borrow; borrow = x < 0 ? -1 : 0; out[j] = x + (radix & borrow); } ncancel = 0; for (j = 0; j < n && out[j] == 0; j++) { ncancel = j + 1; } if (ncancel > 0 && ncancel < n) { for (j = 0; j < n - ncancel; j++) { out[j] = out[j + ncancel]; } for (j = n - ncancel; j < n; j++) { out[j] = 0; } } return ncancel; } /* -------- mp_imul routines -------- */ void mp_imul(int n, int radix, int in1[], int in2, int out[]) { void mp_unsgn_imul(int n, double dradix, int in1[], double din2, int out[]); if (in2 > 0) { out[0] = in1[0]; } else if (in2 < 0) { out[0] = -in1[0]; in2 = -in2; } else { out[0] = 0; } mp_unsgn_imul(n, radix, &in1[1], in2, &out[1]); if (out[0] == 0) { out[1] = 0; } } int mp_idiv(int n, int radix, int in1[], int in2, int out[]) { void mp_load_0(int n, int radix, int out[]); void mp_unsgn_idiv(int n, double dradix, int in1[], double din2, int out[]); if (in2 == 0) { return -1; } if (in2 > 0) { out[0] = in1[0]; } else { out[0] = -in1[0]; in2 = -in2; } if (in1[0] == 0) { mp_load_0(n, radix, out); return 0; } mp_unsgn_idiv(n, radix, &in1[1], in2, &out[1]); return 0; } void mp_idiv_2(int n, int radix, int in[], int out[]) { int j, ix, carry, shift; out[0] = in[0]; shift = 0; if (in[2] == 1) { shift = 1; } out[1] = in[1] - shift; carry = -shift; for (j = 2; j <= n + 1 - shift; j++) { ix = in[j + shift] + (radix & carry); carry = -(ix & 1); out[j] = ix >> 1; } if (shift > 0) { out[n + 1] = (radix & carry) >> 1; } } /* -------- mp_imul child routines -------- */ void mp_unsgn_imul(int n, double dradix, int in1[], double din2, int out[]) { int j, carry, shift; double x, d1_radix; d1_radix = 1.0 / dradix; carry = 0; for (j = n; j >= 1; j--) { x = din2 * in1[j] + carry + 0.5; carry = (int) (d1_radix * x); out[j] = (int) (x - dradix * carry); } shift = 0; x = carry + 0.5; while (x > 1) { x *= d1_radix; shift++; } out[0] = in1[0] + shift; if (shift > 0) { while (shift > n) { carry = (int) (d1_radix * carry + 0.5); shift--; } for (j = n; j >= shift + 1; j--) { out[j] = out[j - shift]; } for (j = shift; j >= 1; j--) { x = carry + 0.5; carry = (int) (d1_radix * x); out[j] = (int) (x - dradix * carry); } } } void mp_unsgn_idiv(int n, double dradix, int in1[], double din2, int out[]) { int j, ix, carry, shift; double x, d1_in2; d1_in2 = 1.0 / din2; shift = 0; x = 0; do { shift++; x *= dradix; if (shift <= n) { x += in1[shift]; } } while (x < din2 - 0.5); x += 0.5; ix = (int) (d1_in2 * x); carry = (int) (x - din2 * ix); out[1] = ix; shift--; out[0] = in1[0] - shift; if (shift >= n) { shift = n - 1; } for (j = 2; j <= n - shift; j++) { x = in1[j + shift] + dradix * carry + 0.5; ix = (int) (d1_in2 * x); carry = (int) (x - din2 * ix); out[j] = ix; } for (j = n - shift + 1; j <= n; j++) { x = dradix * carry + 0.5; ix = (int) (d1_in2 * x); carry = (int) (x - din2 * ix); out[j] = ix; } } /* -------- mp_mul routines -------- */ double mp_mul_radix_test(int n, int radix, int nfft, double tmpfft[], int ip[], double w[]) { void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_csqu(int nfft, double dinout[]); double mp_mul_d2i_test(int radix, int nfft, double din[]); int j, ndata, radix_2; ndata = (nfft >> 1) + 1; if (ndata > n) { ndata = n; } tmpfft[nfft + 1] = radix - 1; for (j = nfft; j > ndata; j--) { tmpfft[j] = 0; } radix_2 = (radix + 1) / 2; for (j = ndata; j > 2; j--) { tmpfft[j] = radix_2; } tmpfft[2] = radix; tmpfft[1] = radix - 1; tmpfft[0] = 0; rdft(nfft, 1, &tmpfft[1], ip, w); mp_mul_csqu(nfft, tmpfft); rdft(nfft, -1, &tmpfft[1], ip, w); return 2 * mp_mul_d2i_test(radix, nfft, tmpfft); } void mp_mul(int n, int radix, int in1[], int in2[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], double tmp3fft[], int ip[], double w[]) { void mp_copy(int n, int radix, int in[], int out[]); void mp_add(int n, int radix, int in1[], int in2[], int out[]); void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]); void mp_mul_cmul(int nfft, double din[], double dinout[]); void mp_mul_cmuladd(int nfft, double din1[], double din2[], double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); int n_h, shift; shift = (nfft >> 1) + 1; while (n > shift) { if (in1[shift + 2] + in2[shift + 2] != 0) { break; } shift++; } n_h = n / 2 + 1; if (n_h < n - shift) { n_h = n - shift; } /* ---- tmp3fft = (upper) in1 * (lower) in2 ---- */ mp_mul_i2d(n, radix, nfft, 0, in1, tmp1fft); rdft(nfft, 1, &tmp1fft[1], ip, w); mp_mul_i2d(n, radix, nfft, shift, in2, tmp3fft); rdft(nfft, 1, &tmp3fft[1], ip, w); mp_mul_cmul(nfft, tmp1fft, tmp3fft); /* ---- tmp = (upper) in1 * (upper) in2 ---- */ mp_mul_i2d(n, radix, nfft, 0, in2, tmp2fft); rdft(nfft, 1, &tmp2fft[1], ip, w); mp_mul_cmul(nfft, tmp2fft, tmp1fft); rdft(nfft, -1, &tmp1fft[1], ip, w); mp_mul_d2i(n, radix, nfft, tmp1fft, tmp); /* ---- tmp3fft += (upper) in2 * (lower) in1 ---- */ mp_mul_i2d(n, radix, nfft, shift, in1, tmp1fft); rdft(nfft, 1, &tmp1fft[1], ip, w); mp_mul_cmuladd(nfft, tmp1fft, tmp2fft, tmp3fft); /* ---- out = tmp + tmp3fft ---- */ rdft(nfft, -1, &tmp3fft[1], ip, w); mp_mul_d2i(n_h, radix, nfft, tmp3fft, out); if (out[0] != 0) { mp_add(n, radix, out, tmp, out); } else { mp_copy(n, radix, tmp, out); } } void mp_squ(int n, int radix, int in[], int out[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]) { void mp_add(int n, int radix, int in1[], int in2[], int out[]); void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]); void mp_mul_cmul(int nfft, double din[], double dinout[]); void mp_mul_csqu(int nfft, double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); int n_h, shift; shift = (nfft >> 1) + 1; while (n > shift) { if (in[shift + 2] != 0) { break; } shift++; } n_h = n / 2 + 1; if (n_h < n - shift) { n_h = n - shift; } /* ---- tmp = (upper) in * (lower) in ---- */ mp_mul_i2d(n, radix, nfft, 0, in, tmp1fft); rdft(nfft, 1, &tmp1fft[1], ip, w); mp_mul_i2d(n, radix, nfft, shift, in, tmp2fft); rdft(nfft, 1, &tmp2fft[1], ip, w); mp_mul_cmul(nfft, tmp1fft, tmp2fft); rdft(nfft, -1, &tmp2fft[1], ip, w); mp_mul_d2i(n_h, radix, nfft, tmp2fft, tmp); /* ---- out = 2 * tmp + ((upper) in)^2 ---- */ mp_mul_csqu(nfft, tmp1fft); rdft(nfft, -1, &tmp1fft[1], ip, w); mp_mul_d2i(n, radix, nfft, tmp1fft, out); if (tmp[0] != 0) { mp_add(n_h, radix, tmp, tmp, tmp); mp_add(n, radix, out, tmp, out); } } void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[], int ip[], double w[]) { void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]); void mp_mul_cmul(int nfft, double din[], double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); mp_mul_i2d(n, radix, nfft, 0, in1, in1fft); rdft(nfft, 1, &in1fft[1], ip, w); mp_mul_i2d(n, radix, nfft, 0, in2, outfft); rdft(nfft, 1, &outfft[1], ip, w); mp_mul_cmul(nfft, in1fft, outfft); rdft(nfft, -1, &outfft[1], ip, w); mp_mul_d2i(n, radix, nfft, outfft, out); } void mp_mulh_use_in1fft(int n, int radix, double in1fft[], int shift, int in2[], int out[], int nfft, double outfft[], int ip[], double w[]) { void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]); void mp_mul_cmul(int nfft, double din[], double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); int n_h; while (n > shift) { if (in2[shift + 2] != 0) { break; } shift++; } n_h = n / 2 + 1; if (n_h < n - shift) { n_h = n - shift; } mp_mul_i2d(n, radix, nfft, shift, in2, outfft); rdft(nfft, 1, &outfft[1], ip, w); mp_mul_cmul(nfft, in1fft, outfft); rdft(nfft, -1, &outfft[1], ip, w); mp_mul_d2i(n_h, radix, nfft, outfft, out); } void mp_squh(int n, int radix, int in[], int out[], int nfft, double inoutfft[], int ip[], double w[]) { void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]); void mp_mul_csqu(int nfft, double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); mp_mul_i2d(n, radix, nfft, 0, in, inoutfft); rdft(nfft, 1, &inoutfft[1], ip, w); mp_mul_csqu(nfft, inoutfft); rdft(nfft, -1, &inoutfft[1], ip, w); mp_mul_d2i(n, radix, nfft, inoutfft, out); } void mp_squh_use_in1fft(int n, int radix, double inoutfft[], int out[], int nfft, int ip[], double w[]) { void rdft(int n, int isgn, double *a, int *ip, double *w); void mp_mul_csqu(int nfft, double dinout[]); void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]); mp_mul_csqu(nfft, inoutfft); rdft(nfft, -1, &inoutfft[1], ip, w); mp_mul_d2i(n, radix, nfft, inoutfft, out); } /* -------- mp_mul child routines -------- */ void mp_mul_i2d(int n, int radix, int nfft, int shift, int in[], double dout[]) { int j, x, carry, ndata, radix_2, topdgt; ndata = 0; topdgt = 0; if (n > shift) { topdgt = in[shift + 2]; ndata = (nfft >> 1) + 1; if (ndata > n - shift) { ndata = n - shift; } } dout[nfft + 1] = in[0] * topdgt; for (j = nfft; j > ndata; j--) { dout[j] = 0; } /* ---- abs(dout[j]) <= radix/2 (to keep FFT precision) ---- */ if (ndata > 1) { radix_2 = radix / 2; carry = 0; for (j = ndata + 1; j > 3; j--) { x = in[j + shift] - carry; carry = x >= radix_2 ? -1 : 0; dout[j - 1] = x - (radix & carry); } dout[2] = in[shift + 3] - carry; } dout[1] = topdgt; dout[0] = in[1] - shift; } void mp_mul_cmul(int nfft, double din[], double dinout[]) { int j; double xr, xi, yr, yi; dinout[0] += din[0]; dinout[1] *= din[1]; dinout[2] *= din[2]; for (j = 3; j < nfft; j += 2) { xr = din[j]; xi = din[j + 1]; yr = dinout[j]; yi = dinout[j + 1]; dinout[j] = xr * yr - xi * yi; dinout[j + 1] = xr * yi + xi * yr; } dinout[nfft + 1] *= din[nfft + 1]; } void mp_mul_cmuladd(int nfft, double din1[], double din2[], double dinout[]) { int j; double xr, xi, yr, yi; dinout[1] += din1[1] * din2[1]; dinout[2] += din1[2] * din2[2]; for (j = 3; j < nfft; j += 2) { xr = din1[j]; xi = din1[j + 1]; yr = din2[j]; yi = din2[j + 1]; dinout[j] += xr * yr - xi * yi; dinout[j + 1] += xr * yi + xi * yr; } dinout[nfft + 1] += din1[nfft + 1] * din2[nfft + 1]; } void mp_mul_csqu(int nfft, double dinout[]) { int j; double xr, xi; dinout[0] *= 2; dinout[1] *= dinout[1]; dinout[2] *= dinout[2]; for (j = 3; j < nfft; j += 2) { xr = dinout[j]; xi = dinout[j + 1]; dinout[j] = xr * xr - xi * xi; dinout[j + 1] = 2 * xr * xi; } dinout[nfft + 1] *= dinout[nfft + 1]; } void mp_mul_d2i(int n, int radix, int nfft, double din[], int out[]) { int j, carry, carry1, carry2, shift, ndata; double x, scale, d1_radix, d1_radix2, pow_radix, topdgt; scale = 2.0 / nfft; d1_radix = 1.0 / radix; d1_radix2 = d1_radix * d1_radix; topdgt = din[nfft + 1]; x = topdgt < 0 ? -topdgt : topdgt; shift = x + 0.5 >= radix ? 1 : 0; /* ---- correction of cyclic convolution of din[1] ---- */ x *= nfft * 0.5; din[nfft + 1] = din[1] - x; din[1] = x; /* ---- output of digits ---- */ ndata = n; if (n > nfft + 1 + shift) { ndata = nfft + 1 + shift; for (j = n + 1; j > ndata + 1; j--) { out[j] = 0; } } x = 0; pow_radix = 1; for (j = ndata + 1 - shift; j <= nfft + 1; j++) { x += pow_radix * din[j]; pow_radix *= d1_radix; if (pow_radix < DBL_EPSILON) { break; } } x = d1_radix2 * (scale * x + 0.5); carry2 = ((int) x) - 1; carry = (int) (radix * (x - carry2) + 0.5); for (j = ndata; j > 1; j--) { x = d1_radix2 * (scale * din[j - shift] + carry + 0.5); carry = carry2; carry2 = ((int) x) - 1; x = radix * (x - carry2); carry1 = (int) x; out[j + 1] = (int) (radix * (x - carry1)); carry += carry1; } x = carry + ((double) radix) * carry2 + 0.5; if (shift == 0) { x += scale * din[1]; } carry = (int) (d1_radix * x); out[2] = (int) (x - ((double) radix) * carry); if (carry > 0) { for (j = n + 1; j > 2; j--) { out[j] = out[j - 1]; } out[2] = carry; shift++; } /* ---- output of exp, sgn ---- */ x = din[0] + shift + 0.5; shift = ((int) x) - 1; out[1] = shift + ((int) (x - shift)); out[0] = topdgt > 0.5 ? 1 : -1; if (out[2] == 0) { out[0] = 0; out[1] = 0; } } double mp_mul_d2i_test(int radix, int nfft, double din[]) { int j, carry, carry1, carry2; double x, scale, d1_radix, d1_radix2, err; scale = 2.0 / nfft; d1_radix = 1.0 / radix; d1_radix2 = d1_radix * d1_radix; /* ---- correction of cyclic convolution of din[1] ---- */ x = din[nfft + 1] * nfft * 0.5; if (x < 0) { x = -x; } din[nfft + 1] = din[1] - x; /* ---- check of digits ---- */ err = 0; carry = 0; carry2 = 0; for (j = nfft + 1; j > 1; j--) { x = d1_radix2 * (scale * din[j] + carry + 0.5); carry = carry2; carry2 = ((int) x) - 1; x = radix * (x - carry2); carry1 = (int) x; x = radix * (x - carry1); carry += carry1; x = x - 0.5 - ((int) x); if (x > err) { err = x; } else if (-x > err) { err = -x; } } return err; } /* -------- mp_inv routines -------- */ int mp_inv(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]) { int mp_get_nfft_init(int radix, int nfft_max); void mp_inv_init(int n, int radix, int in[], int out[]); int mp_inv_newton(int n, int radix, int in[], int inout[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]); int n_nwt, nfft_nwt, thr, prc; if (in[0] == 0) { return -1; } nfft_nwt = mp_get_nfft_init(radix, nfft); n_nwt = nfft_nwt + 2; if (n_nwt > n) { n_nwt = n; } mp_inv_init(n_nwt, radix, in, out); thr = 8; do { n_nwt = nfft_nwt + 2; if (n_nwt > n) { n_nwt = n; } prc = mp_inv_newton(n_nwt, radix, in, out, tmp1, tmp2, nfft_nwt, tmp1fft, tmp2fft, ip, w); if (thr * nfft_nwt >= nfft) { thr = 0; if (2 * prc <= n_nwt - 2) { nfft_nwt >>= 1; } } else { if (3 * prc < n_nwt - 2) { nfft_nwt >>= 1; } } nfft_nwt <<= 1; } while (nfft_nwt <= nfft); return 0; } int mp_sqrt(int n, int radix, int in[], int out[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]) { void mp_load_0(int n, int radix, int out[]); int mp_get_nfft_init(int radix, int nfft_max); void mp_sqrt_init(int n, int radix, int in[], int out[], int out_rev[]); int mp_sqrt_newton(int n, int radix, int in[], int inout[], int inout_rev[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[], int *n_tmp1fft); int n_nwt, nfft_nwt, thr, prc, n_tmp1fft; if (in[0] < 0) { return -1; } else if (in[0] == 0) { mp_load_0(n, radix, out); return 0; } nfft_nwt = mp_get_nfft_init(radix, nfft); n_nwt = nfft_nwt + 2; if (n_nwt > n) { n_nwt = n; } mp_sqrt_init(n_nwt, radix, in, out, tmp1); n_tmp1fft = 0; thr = 8; do { n_nwt = nfft_nwt + 2; if (n_nwt > n) { n_nwt = n; } prc = mp_sqrt_newton(n_nwt, radix, in, out, tmp1, tmp2, nfft_nwt, tmp1fft, tmp2fft, ip, w, &n_tmp1fft); if (thr * nfft_nwt >= nfft) { thr = 0; if (2 * prc <= n_nwt - 2) { nfft_nwt >>= 1; } } else { if (3 * prc < n_nwt - 2) { nfft_nwt >>= 1; } } nfft_nwt <<= 1; } while (nfft_nwt <= nfft); return 0; } /* -------- mp_inv child routines -------- */ int mp_get_nfft_init(int radix, int nfft_max) { int nfft_init; double r; r = radix; nfft_init = 1; do { r *= r; nfft_init <<= 1; } while (DBL_EPSILON * r < 1 && nfft_init < nfft_max); return nfft_init; } void mp_inv_init(int n, int radix, int in[], int out[]) { void mp_unexp_d2mp(int n, int radix, double din, int out[]); double mp_unexp_mp2d(int n, int radix, int in[]); int outexp; double din; out[0] = in[0]; outexp = -in[1]; din = 1.0 / mp_unexp_mp2d(n, radix, &in[2]); while (din < 1) { din *= radix; outexp--; } out[1] = outexp; mp_unexp_d2mp(n, radix, din, &out[2]); } void mp_sqrt_init(int n, int radix, int in[], int out[], int out_rev[]) { void mp_unexp_d2mp(int n, int radix, double din, int out[]); double mp_unexp_mp2d(int n, int radix, int in[]); int outexp; double din; out[0] = 1; out_rev[0] = 1; outexp = in[1]; din = mp_unexp_mp2d(n, radix, &in[2]); if (outexp % 2 != 0) { din *= radix; outexp--; } outexp /= 2; din = sqrt(din); if (din < 1) { din *= radix; outexp--; } out[1] = outexp; mp_unexp_d2mp(n, radix, din, &out[2]); outexp = -outexp; din = 1.0 / din; while (din < 1) { din *= radix; outexp--; } out_rev[1] = outexp; mp_unexp_d2mp(n, radix, din, &out_rev[2]); } void mp_unexp_d2mp(int n, int radix, double din, int out[]) { int j, x; for (j = 0; j < n; j++) { x = (int) din; if (x >= radix) { x = radix - 1; din = radix; } din = radix * (din - x); out[j] = x; } } double mp_unexp_mp2d(int n, int radix, int in[]) { int j; double d1_radix, dout; d1_radix = 1.0 / radix; dout = 0; for (j = n - 1; j >= 0; j--) { dout = d1_radix * dout + in[j]; } return dout; } int mp_inv_newton(int n, int radix, int in[], int inout[], int tmp1[], int tmp2[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[]) { void mp_load_1(int n, int radix, int out[]); void mp_round(int n, int radix, int m, int inout[]); void mp_add(int n, int radix, int in1[], int in2[], int out[]); void mp_sub(int n, int radix, int in1[], int in2[], int out[]); void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[], int ip[], double w[]); void mp_mulh_use_in1fft(int n, int radix, double in1fft[], int shift, int in2[], int out[], int nfft, double outfft[], int ip[], double w[]); int n_h, shift, prc; shift = (nfft >> 1) + 1; n_h = n / 2 + 1; if (n_h < n - shift) { n_h = n - shift; } /* ---- tmp1 = inout * (upper) in (half to normal precision) ---- */ mp_round(n, radix, shift, inout); mp_mulh(n, radix, inout, in, tmp1, nfft, tmp1fft, tmp2fft, ip, w); /* ---- tmp2 = 1 - tmp1 ---- */ mp_load_1(n, radix, tmp2); mp_sub(n, radix, tmp2, tmp1, tmp2); /* ---- tmp2 -= inout * (lower) in (half precision) ---- */ mp_mulh_use_in1fft(n, radix, tmp1fft, shift, in, tmp1, nfft, tmp2fft, ip, w); mp_sub(n_h, radix, tmp2, tmp1, tmp2); /* ---- get precision ---- */ prc = -tmp2[1]; if (tmp2[0] == 0) { prc = nfft + 1; } /* ---- tmp2 *= inout (half precision) ---- */ mp_mulh_use_in1fft(n_h, radix, tmp1fft, 0, tmp2, tmp2, nfft, tmp2fft, ip, w); /* ---- inout += tmp2 ---- */ if (tmp2[0] != 0) { mp_add(n, radix, inout, tmp2, inout); } return prc; } int mp_sqrt_newton(int n, int radix, int in[], int inout[], int inout_rev[], int tmp[], int nfft, double tmp1fft[], double tmp2fft[], int ip[], double w[], int *n_tmp1fft) { void mp_round(int n, int radix, int m, int inout[]); void mp_add(int n, int radix, int in1[], int in2[], int out[]); void mp_sub(int n, int radix, int in1[], int in2[], int out[]); void mp_idiv_2(int n, int radix, int in[], int out[]); void mp_mulh(int n, int radix, int in1[], int in2[], int out[], int nfft, double in1fft[], double outfft[], int ip[], double w[]); void mp_squh(int n, int radix, int in[], int out[], int nfft, double inoutfft[], int ip[], double w[]); void mp_squh_use_in1fft(int n, int radix, double inoutfft[], int out[], int nfft, int ip[], double w[]); int n_h, nfft_h, shift, prc; nfft_h = nfft >> 1; shift = nfft_h + 1; if (nfft_h < 2) { nfft_h = 2; } n_h = n / 2 + 1; if (n_h < n - shift) { n_h = n - shift; } /* ---- tmp = inout_rev^2 (1/4 to half precision) ---- */ mp_round(n_h, radix, (nfft_h >> 1) + 1, inout_rev); if (*n_tmp1fft != nfft_h) { mp_squh(n_h, radix, inout_rev, tmp, nfft_h, tmp1fft, ip, w); } else { mp_squh_use_in1fft(n_h, radix, tmp1fft, tmp, nfft_h, ip, w); } /* ---- tmp = inout_rev - inout * tmp (half precision) ---- */ mp_round(n, radix, shift, inout); mp_mulh(n_h, radix, inout, tmp, tmp, nfft, tmp1fft, tmp2fft, ip, w); mp_sub(n_h, radix, inout_rev, tmp, tmp); /* ---- inout_rev += tmp ---- */ mp_add(n_h, radix, inout_rev, tmp, inout_rev); /* ---- tmp = in - inout^2 (half to normal precision) ---- */ mp_squh_use_in1fft(n, radix, tmp1fft, tmp, nfft, ip, w); mp_sub(n, radix, in, tmp, tmp); /* ---- get precision ---- */ prc = in[1] - tmp[1]; if (in[2] > tmp[2]) { prc++; } if (tmp[0] == 0) { prc = nfft + 1; } /* ---- tmp = tmp * inout_rev / 2 (half precision) ---- */ mp_round(n_h, radix, shift, inout_rev); mp_mulh(n_h, radix, inout_rev, tmp, tmp, nfft, tmp1fft, tmp2fft, ip, w); *n_tmp1fft = nfft; mp_idiv_2(n_h, radix, tmp, tmp); /* ---- inout += tmp ---- */ if (tmp[0] != 0) { mp_add(n, radix, inout, tmp, inout); } return prc; } /* -------- mp_io routines -------- */ void mp_sprintf(int n, int log10_radix, int in[], char out[]) { int j, k, x, y, outexp, shift; if (in[0] < 0) { *out++ = '-'; } x = in[2]; shift = log10_radix; for (k = log10_radix; k > 0; k--) { y = x % 10; x /= 10; out[k] = '0' + y; if (y != 0) { shift = k; } } out[0] = out[shift]; out[1] = '.'; for (k = 1; k <= log10_radix - shift; k++) { out[k + 1] = out[k + shift]; } outexp = log10_radix - shift; out += outexp + 2; for (j = 3; j <= n + 1; j++) { x = in[j]; for (k = log10_radix - 1; k >= 0; k--) { y = x % 10; x /= 10; out[k] = '0' + y; } out += log10_radix; } *out++ = 'e'; outexp += log10_radix * in[1]; sprintf(out, "%d", outexp); } void mp_sscanf(int n, int log10_radix, char in[], int out[]) { char *s; int j, x, outexp, outexp_mod; while (*in == ' ') { in++; } out[0] = 1; if (*in == '-') { out[0] = -1; in++; } else if (*in == '+') { in++; } while (*in == ' ' || *in == '0') { in++; } outexp = 0; for (s = in; *s != '\0'; s++) { if (*s == 'e' || *s == 'E' || *s == 'd' || *s == 'D') { if (sscanf(++s, "%d", &outexp) != 1) { outexp = 0; } break; } } if (*in == '.') { do { outexp--; while (*++in == ' '); } while (*in == '0' && *in != '\0'); } else if (*in != '\0') { s = in; while (*++s == ' '); while (*s >= '0' && *s <= '9' && *s != '\0') { outexp++; while (*++s == ' '); } } x = outexp / log10_radix; outexp_mod = outexp - log10_radix * x; if (outexp_mod < 0) { x--; outexp_mod += log10_radix; } out[1] = x; x = 0; j = 2; for (s = in; *s != '\0'; s++) { if (*s == '.' || *s == ' ') { continue; } if (*s < '0' || *s > '9') { break; } x = 10 * x + (*s - '0'); if (--outexp_mod < 0) { if (j > n + 1) { break; } out[j++] = x; x = 0; outexp_mod = log10_radix - 1; } } while (outexp_mod-- >= 0) { x *= 10; } while (j <= n + 1) { out[j++] = x; x = 0; } if (out[2] == 0) { out[0] = 0; out[1] = 0; } } void mp_fprintf(int n, int log10_radix, int in[], FILE *fout) { int j, k, x, y, outexp, shift; char out[256]; if (in[0] < 0) { putc('-', fout); } x = in[2]; shift = log10_radix; for (k = log10_radix; k > 0; k--) { y = x % 10; x /= 10; out[k] = '0' + y; if (y != 0) { shift = k; } } putc(out[shift], fout); putc('.', fout); for (k = 1; k <= log10_radix - shift; k++) { putc(out[k + shift], fout); } outexp = log10_radix - shift; for (j = 3; j <= n + 1; j++) { x = in[j]; for (k = log10_radix - 1; k >= 0; k--) { y = x % 10; x /= 10; out[k] = '0' + y; } for (k = 0; k < log10_radix; k++) { putc(out[k], fout); } } putc('e', fout); outexp += log10_radix * in[1]; sprintf(out, "%d", outexp); for (k = 0; out[k] != '\0'; k++) { putc(out[k], fout); } } OouraFFT-1.0/sample2d/000077500000000000000000000000001246725775500145325ustar00rootroot00000000000000OouraFFT-1.0/sample2d/Makefile.f77000066400000000000000000000021111246725775500165670ustar00rootroot00000000000000# ---- for GNU g77 ---- F77 = g77 FFLAGS = -Wall OFLAGS = -O2 # ---- for SUN WS f77 ---- # #F77 = f77 # #FFLAGS = # #OFLAGS = -xO2 all: fft4f2dt_f fftsg2dt_f fftsg3dt_f fft4f2dt_f : fft4f2dt_f.o fft4f2d_f.o $(F77) fft4f2dt_f.o fft4f2d_f.o -o fft4f2dt_f fftsg2dt_f : fftsg2dt_f.o fftsg2d_f.o fftsg_f.o $(F77) fftsg2dt_f.o fftsg2d_f.o fftsg_f.o -o fftsg2dt_f fftsg3dt_f : fftsg3dt_f.o fftsg3d_f.o fftsg_f.o $(F77) fftsg3dt_f.o fftsg3d_f.o fftsg_f.o -o fftsg3dt_f fft4f2dt_f.o : fft4f2dt.f $(F77) $(FFLAGS) $(OFLAGS) -c fft4f2dt.f -o fft4f2dt_f.o fftsg2dt_f.o : fftsg2dt.f $(F77) $(FFLAGS) $(OFLAGS) -c fftsg2dt.f -o fftsg2dt_f.o fftsg3dt_f.o : fftsg3dt.f $(F77) $(FFLAGS) $(OFLAGS) -c fftsg3dt.f -o fftsg3dt_f.o fft4f2d_f.o : ../fft4f2d.f $(F77) $(FFLAGS) $(OFLAGS) -c ../fft4f2d.f -o fft4f2d_f.o fftsg2d_f.o : ../fftsg2d.f $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg2d.f -o fftsg2d_f.o fftsg3d_f.o : ../fftsg3d.f $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg3d.f -o fftsg3d_f.o fftsg_f.o : ../fftsg.f $(F77) $(FFLAGS) $(OFLAGS) -c ../fftsg.f -o fftsg_f.o clean: rm -f *.o OouraFFT-1.0/sample2d/Makefile.pth000066400000000000000000000017721246725775500167730ustar00rootroot00000000000000# ---- for GNU gcc ---- CC = gcc CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS OFLAGS = -O2 # ---- for SUN WS cc ---- # #CC = cc # #CFLAGS = -Wall -DUSE_FFT2D_PTHREADS -DUSE_FFT3D_PTHREADS # #OFLAGS = -xO2 all: fftsg2dt_pt fftsg3dt_pt fftsg2dt_pt : fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o $(CC) fftsg2dt.o fftsg2dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg2dt_pt fftsg3dt_pt : fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o $(CC) fftsg3dt.o fftsg3dpt.o fftsg.o alloc.o -lm -lpthread -o fftsg3dt_pt fftsg2dt.o : fftsg2dt.c $(CC) $(CFLAGS) $(OFLAGS) -c fftsg2dt.c -o fftsg2dt.o fftsg3dt.o : fftsg3dt.c $(CC) $(CFLAGS) $(OFLAGS) -c fftsg3dt.c -o fftsg3dt.o fftsg2dpt.o : ../fftsg2d.c $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg2d.c -o fftsg2dpt.o fftsg3dpt.o : ../fftsg3d.c $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg3d.c -o fftsg3dpt.o fftsg.o : ../fftsg.c $(CC) $(CFLAGS) $(OFLAGS) -c ../fftsg.c -o fftsg.o alloc.o : ../alloc.c $(CC) $(CFLAGS) $(OFLAGS) -c ../alloc.c -o alloc.o clean: rm -f *.o OouraFFT-1.0/sample2d/alloc.h000066400000000000000000000007621246725775500160020ustar00rootroot00000000000000/* ---- memory allocation ---- */ #include #include int *alloc_1d_int(int n1); void free_1d_int(int *i); double *alloc_1d_double(int n1); void free_1d_double(double *d); int **alloc_2d_int(int n1, int n2); void free_2d_int(int **ii); double **alloc_2d_double(int n1, int n2); void free_2d_double(double **dd); int ***alloc_3d_int(int n1, int n2, int n3); void free_3d_int(int ***iii); double ***alloc_3d_double(int n1, int n2, int n3); void free_3d_double(double ***ddd); OouraFFT-1.0/sample2d/fft4f2dt.c000066400000000000000000000053621246725775500163270ustar00rootroot00000000000000/* test of fft4f2d.c */ #include #include #include "alloc.h" #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) int main() { void cdft2d(int, int, int, double **, int *, double *); void rdft2d(int, int, int, double **, int *, double *); void ddct2d(int, int, int, double **, double **, int *, double *); void ddst2d(int, int, int, double **, double **, int *, double *); void putdata2d(int n1, int n2, double **a); double errorcheck2d(int n1, int n2, double scale, double **a); int *ip, n1, n2, n, i; double **a, **t, *w, err; printf("data length n1=? (n1 = power of 2) \n"); scanf("%d", &n1); printf("data length n2=? (n2 = power of 2) \n"); scanf("%d", &n2); a = alloc_2d_double(n1, n2); t = alloc_2d_double(n1, n2); n = MAX(n1, n2 / 2); ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); n = MAX(n1 / 2, n2 / 4) + MAX(n1, n2); w = alloc_1d_double(n); ip[0] = 0; /* check of CDFT */ putdata2d(n1, n2, a); cdft2d(n1, n2, 1, a, ip, w); cdft2d(n1, n2, -1, a, ip, w); err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); printf("cdft2d err= %g \n", err); /* check of RDFT */ putdata2d(n1, n2, a); rdft2d(n1, n2, 1, a, ip, w); rdft2d(n1, n2, -1, a, ip, w); err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); printf("rdft2d err= %g \n", err); /* check of DDCT */ putdata2d(n1, n2, a); ddct2d(n1, n2, 1, a, t, ip, w); ddct2d(n1, n2, -1, a, t, ip, w); for (i = 0; i <= n1 - 1; i++) { a[i][0] *= 0.5; } for (i = 0; i <= n2 - 1; i++) { a[0][i] *= 0.5; } err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); printf("ddct2d err= %g \n", err); /* check of DDST */ putdata2d(n1, n2, a); ddst2d(n1, n2, 1, a, t, ip, w); ddst2d(n1, n2, -1, a, t, ip, w); for (i = 0; i <= n1 - 1; i++) { a[i][0] *= 0.5; } for (i = 0; i <= n2 - 1; i++) { a[0][i] *= 0.5; } err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); printf("ddst2d err= %g \n", err); free_1d_double(w); free_1d_int(ip); free_2d_double(t); free_2d_double(a); return 0; } void putdata2d(int n1, int n2, double **a) { int j1, j2, seed = 0; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] = RND(&seed); } } } double errorcheck2d(int n1, int n2, double scale, double **a) { int j1, j2, seed = 0; double err = 0, e; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { e = RND(&seed) - a[j1][j2] * scale; err = MAX(err, fabs(e)); } } return err; } OouraFFT-1.0/sample2d/fft4f2dt.f000066400000000000000000000051641246725775500163320ustar00rootroot00000000000000! test of fft4f2d.f ! program main integer nmax, nmaxsqrt parameter (nmax = 1024) parameter (nmaxsqrt = 32) integer ip(0 : nmaxsqrt + 1), n1, n2, i real*8 a(0 : nmax - 1, 0 : nmax - 1), & t(0 : nmax - 1, 0 : nmax - 1), w(0 : nmax * 3 / 2 - 1), & err, errorcheck2d ! write (*, *) 'data length n1=? (n1 = power of 2) ' read (*, *) n1 write (*, *) 'data length n2=? (n2 = power of 2) ' read (*, *) n2 ip(0) = 0 ! ! check of CDFT call putdata2d(nmax, n1, n2, a) call cdft2d(nmax, n1, n2, 1, a, ip, w) call cdft2d(nmax, n1, n2, -1, a, ip, w) err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) write (*, *) 'cdft2d err= ', err ! ! check of RDFT call putdata2d(nmax, n1, n2, a) call rdft2d(nmax, n1, n2, 1, a, ip, w) call rdft2d(nmax, n1, n2, -1, a, ip, w) err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) write (*, *) 'rdft2d err= ', err ! ! check of DDCT call putdata2d(nmax, n1, n2, a) call ddct2d(nmax, n1, n2, 1, a, t, ip, w) call ddct2d(nmax, n1, n2, -1, a, t, ip, w) do i = 0, n1 - 1 a(i, 0) = a(i, 0) * 0.5d0 end do do i = 0, n2 - 1 a(0, i) = a(0, i) * 0.5d0 end do err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) write (*, *) 'ddct2d err= ', err ! ! check of DDST call putdata2d(nmax, n1, n2, a) call ddst2d(nmax, n1, n2, 1, a, t, ip, w) call ddst2d(nmax, n1, n2, -1, a, t, ip, w) do i = 0, n1 - 1 a(i, 0) = a(i, 0) * 0.5d0 end do do i = 0, n2 - 1 a(0, i) = a(0, i) * 0.5d0 end do err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) write (*, *) 'ddst2d err= ', err ! end ! ! subroutine putdata2d(n1max, n1, n2, a) integer n1max, n1, n2, j1, j2, seed real*8 a(0 : n1max - 1, 0 : *), drnd seed = 0 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 a(j1, j2) = drnd(seed) end do end do end ! ! function errorcheck2d(n1max, n1, n2, scale, a) integer n1max, n1, n2, j1, j2, seed real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, & errorcheck2d err = 0 seed = 0 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 e = drnd(seed) - a(j1, j2) * scale err = max(err, abs(e)) end do end do errorcheck2d = err end ! ! ! random number generator, 0 <= drnd < 1 real*8 function drnd(seed) integer seed seed = mod(seed * 7141 + 54773, 259200) drnd = seed * (1.0d0 / 259200) end ! OouraFFT-1.0/sample2d/fftsg2dt.c000066400000000000000000000053351246725775500164270ustar00rootroot00000000000000/* test of fftsg2d.c */ #include #include #include "alloc.h" #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) int main() { void cdft2d(int, int, int, double **, double *, int *, double *); void rdft2d(int, int, int, double **, double *, int *, double *); void ddct2d(int, int, int, double **, double *, int *, double *); void ddst2d(int, int, int, double **, double *, int *, double *); void putdata2d(int n1, int n2, double **a); double errorcheck2d(int n1, int n2, double scale, double **a); int *ip, n1, n2, n, i; double **a, *w, err; printf("data length n1=? (n1 = power of 2) \n"); scanf("%d", &n1); printf("data length n2=? (n2 = power of 2) \n"); scanf("%d", &n2); a = alloc_2d_double(n1, n2); n = MAX(n1, n2 / 2); ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); n = MAX(n1, n2) * 3 / 2; w = alloc_1d_double(n); ip[0] = 0; /* check of CDFT */ putdata2d(n1, n2, a); cdft2d(n1, n2, 1, a, NULL, ip, w); cdft2d(n1, n2, -1, a, NULL, ip, w); err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); printf("cdft2d err= %g \n", err); /* check of RDFT */ putdata2d(n1, n2, a); rdft2d(n1, n2, 1, a, NULL, ip, w); rdft2d(n1, n2, -1, a, NULL, ip, w); err = errorcheck2d(n1, n2, 2.0 / n1 / n2, a); printf("rdft2d err= %g \n", err); /* check of DDCT */ putdata2d(n1, n2, a); ddct2d(n1, n2, 1, a, NULL, ip, w); ddct2d(n1, n2, -1, a, NULL, ip, w); for (i = 0; i <= n1 - 1; i++) { a[i][0] *= 0.5; } for (i = 0; i <= n2 - 1; i++) { a[0][i] *= 0.5; } err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); printf("ddct2d err= %g \n", err); /* check of DDST */ putdata2d(n1, n2, a); ddst2d(n1, n2, 1, a, NULL, ip, w); ddst2d(n1, n2, -1, a, NULL, ip, w); for (i = 0; i <= n1 - 1; i++) { a[i][0] *= 0.5; } for (i = 0; i <= n2 - 1; i++) { a[0][i] *= 0.5; } err = errorcheck2d(n1, n2, 4.0 / n1 / n2, a); printf("ddst2d err= %g \n", err); free_1d_double(w); free_1d_int(ip); free_2d_double(a); return 0; } void putdata2d(int n1, int n2, double **a) { int j1, j2, seed = 0; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] = RND(&seed); } } } double errorcheck2d(int n1, int n2, double scale, double **a) { int j1, j2, seed = 0; double err = 0, e; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { e = RND(&seed) - a[j1][j2] * scale; err = MAX(err, fabs(e)); } } return err; } OouraFFT-1.0/sample2d/fftsg2dt.f000066400000000000000000000051531246725775500164300ustar00rootroot00000000000000! test of fftsg2d.f ! program main integer nmax, nmaxsqrt parameter (nmax = 1024) parameter (nmaxsqrt = 32) integer ip(0 : nmaxsqrt + 1), n1, n2, i real*8 a(0 : nmax - 1, 0 : nmax - 1), t(0 : 8 * nmax - 1), & w(0 : nmax * 3 / 2 - 1), err, errorcheck2d ! write (*, *) 'data length n1=? (n1 = power of 2) ' read (*, *) n1 write (*, *) 'data length n2=? (n2 = power of 2) ' read (*, *) n2 ip(0) = 0 ! ! check of CDFT call putdata2d(nmax, n1, n2, a) call cdft2d(nmax, n1, n2, 1, a, t, ip, w) call cdft2d(nmax, n1, n2, -1, a, t, ip, w) err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) write (*, *) 'cdft2d err= ', err ! ! check of RDFT call putdata2d(nmax, n1, n2, a) call rdft2d(nmax, n1, n2, 1, a, t, ip, w) call rdft2d(nmax, n1, n2, -1, a, t, ip, w) err = errorcheck2d(nmax, n1, n2, 2.0d0 / n1 / n2, a) write (*, *) 'rdft2d err= ', err ! ! check of DDCT call putdata2d(nmax, n1, n2, a) call ddct2d(nmax, n1, n2, 1, a, t, ip, w) call ddct2d(nmax, n1, n2, -1, a, t, ip, w) do i = 0, n1 - 1 a(i, 0) = a(i, 0) * 0.5d0 end do do i = 0, n2 - 1 a(0, i) = a(0, i) * 0.5d0 end do err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) write (*, *) 'ddct2d err= ', err ! ! check of DDST call putdata2d(nmax, n1, n2, a) call ddst2d(nmax, n1, n2, 1, a, t, ip, w) call ddst2d(nmax, n1, n2, -1, a, t, ip, w) do i = 0, n1 - 1 a(i, 0) = a(i, 0) * 0.5d0 end do do i = 0, n2 - 1 a(0, i) = a(0, i) * 0.5d0 end do err = errorcheck2d(nmax, n1, n2, 4.0d0 / n1 / n2, a) write (*, *) 'ddst2d err= ', err ! end ! ! subroutine putdata2d(n1max, n1, n2, a) integer n1max, n1, n2, j1, j2, seed real*8 a(0 : n1max - 1, 0 : *), drnd seed = 0 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 a(j1, j2) = drnd(seed) end do end do end ! ! function errorcheck2d(n1max, n1, n2, scale, a) integer n1max, n1, n2, j1, j2, seed real*8 scale, a(0 : n1max - 1, 0 : *), drnd, err, e, & errorcheck2d err = 0 seed = 0 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 e = drnd(seed) - a(j1, j2) * scale err = max(err, abs(e)) end do end do errorcheck2d = err end ! ! ! random number generator, 0 <= drnd < 1 real*8 function drnd(seed) integer seed seed = mod(seed * 7141 + 54773, 259200) drnd = seed * (1.0d0 / 259200) end ! OouraFFT-1.0/sample2d/fftsg3dt.c000066400000000000000000000067721246725775500164360ustar00rootroot00000000000000/* test of fftsg3d.c */ #include #include #include "alloc.h" #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) int main() { void cdft3d(int, int, int, int, double ***, double *, int *, double *); void rdft3d(int, int, int, int, double ***, double *, int *, double *); void ddct3d(int, int, int, int, double ***, double *, int *, double *); void ddst3d(int, int, int, int, double ***, double *, int *, double *); void putdata3d(int n1, int n2, int n3, double ***a); double errorcheck3d(int n1, int n2, int n3, double scale, double ***a); int *ip, n1, n2, n3, n, nt, i, j; double ***a, *w, err; printf("data length n1=? (n1 = power of 2) \n"); scanf("%d", &n1); printf("data length n2=? (n2 = power of 2) \n"); scanf("%d", &n2); printf("data length n3=? (n3 = power of 2) \n"); scanf("%d", &n3); a = alloc_3d_double(n1, n2, n3); nt = MAX(n1, n2); n = MAX(nt, n3 / 2); ip = alloc_1d_int(2 + (int) sqrt(n + 0.5)); n = MAX(nt, n3) * 3 / 2; w = alloc_1d_double(n); ip[0] = 0; /* check of CDFT */ putdata3d(n1, n2, n3, a); cdft3d(n1, n2, n3, 1, a, NULL, ip, w); cdft3d(n1, n2, n3, -1, a, NULL, ip, w); err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); printf("cdft3d err= %g \n", err); /* check of RDFT */ putdata3d(n1, n2, n3, a); rdft3d(n1, n2, n3, 1, a, NULL, ip, w); rdft3d(n1, n2, n3, -1, a, NULL, ip, w); err = errorcheck3d(n1, n2, n3, 2.0 / n1 / n2 / n3, a); printf("rdft3d err= %g \n", err); /* check of DDCT */ putdata3d(n1, n2, n3, a); ddct3d(n1, n2, n3, 1, a, NULL, ip, w); ddct3d(n1, n2, n3, -1, a, NULL, ip, w); for (i = 0; i <= n1 - 1; i++) { for (j = 0; j <= n2 - 1; j++) { a[i][j][0] *= 0.5; } for (j = 0; j <= n3 - 1; j++) { a[i][0][j] *= 0.5; } } for (i = 0; i <= n2 - 1; i++) { for (j = 0; j <= n3 - 1; j++) { a[0][i][j] *= 0.5; } } err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); printf("ddct3d err= %g \n", err); /* check of DDST */ putdata3d(n1, n2, n3, a); ddst3d(n1, n2, n3, 1, a, NULL, ip, w); ddst3d(n1, n2, n3, -1, a, NULL, ip, w); for (i = 0; i <= n1 - 1; i++) { for (j = 0; j <= n2 - 1; j++) { a[i][j][0] *= 0.5; } for (j = 0; j <= n3 - 1; j++) { a[i][0][j] *= 0.5; } } for (i = 0; i <= n2 - 1; i++) { for (j = 0; j <= n3 - 1; j++) { a[0][i][j] *= 0.5; } } err = errorcheck3d(n1, n2, n3, 8.0 / n1 / n2 / n3, a); printf("ddst3d err= %g \n", err); free_1d_double(w); free_1d_int(ip); free_3d_double(a); return 0; } void putdata3d(int n1, int n2, int n3, double ***a) { int j1, j2, j3, seed = 0; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { a[j1][j2][j3] = RND(&seed); } } } } double errorcheck3d(int n1, int n2, int n3, double scale, double ***a) { int j1, j2, j3, seed = 0; double err = 0, e; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { for (j3 = 0; j3 <= n3 - 1; j3++) { e = RND(&seed) - a[j1][j2][j3] * scale; err = MAX(err, fabs(e)); } } } return err; } OouraFFT-1.0/sample2d/fftsg3dt.f000066400000000000000000000070241246725775500164300ustar00rootroot00000000000000! test of fftsg3d.f ! program main integer nmax, nmaxsqrt parameter (nmax = 128) parameter (nmaxsqrt = 16) integer ip(0 : nmaxsqrt + 1), n1, n2, n3, i, j real*8 a(0 : nmax - 1, 0 : nmax - 1, 0 : nmax - 1), & t(0 : 8 * nmax - 1), & w(0 : nmax * 3 / 2 - 1), err, errorcheck3d ! write (*, *) 'data length n1=? (n1 = power of 2) ' read (*, *) n1 write (*, *) 'data length n2=? (n2 = power of 2) ' read (*, *) n2 write (*, *) 'data length n3=? (n3 = power of 2) ' read (*, *) n3 ip(0) = 0 ! ! check of CDFT call putdata3d(nmax, nmax, n1, n2, n3, a) call cdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) call cdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) err = errorcheck3d(nmax, nmax, n1, n2, n3, & 2.0d0 / n1 / n2 / n3, a) write (*, *) 'cdft3d err= ', err ! ! check of RDFT call putdata3d(nmax, nmax, n1, n2, n3, a) call rdft3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) call rdft3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) err = errorcheck3d(nmax, nmax, n1, n2, n3, & 2.0d0 / n1 / n2 / n3, a) write (*, *) 'rdft3d err= ', err ! ! check of DDCT call putdata3d(nmax, nmax, n1, n2, n3, a) call ddct3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) call ddct3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) do j = 0, n2 - 1 do i = 0, n1 - 1 a(i, j, 0) = a(i, j, 0) * 0.5d0 end do end do do j = 0, n3 - 1 do i = 0, n1 - 1 a(i, 0, j) = a(i, 0, j) * 0.5d0 end do do i = 0, n2 - 1 a(0, i, j) = a(0, i, j) * 0.5d0 end do end do err = errorcheck3d(nmax, nmax, n1, n2, n3, & 8.0d0 / n1 / n2 / n3, a) write (*, *) 'ddct3d err= ', err ! ! check of DDST call putdata3d(nmax, nmax, n1, n2, n3, a) call ddst3d(nmax, nmax, n1, n2, n3, 1, a, t, ip, w) call ddst3d(nmax, nmax, n1, n2, n3, -1, a, t, ip, w) do j = 0, n2 - 1 do i = 0, n1 - 1 a(i, j, 0) = a(i, j, 0) * 0.5d0 end do end do do j = 0, n3 - 1 do i = 0, n1 - 1 a(i, 0, j) = a(i, 0, j) * 0.5d0 end do do i = 0, n2 - 1 a(0, i, j) = a(0, i, j) * 0.5d0 end do end do err = errorcheck3d(nmax, nmax, n1, n2, n3, & 8.0d0 / n1 / n2 / n3, a) write (*, *) 'ddst3d err= ', err ! end ! ! subroutine putdata3d(n1max, n2max, n1, n2, n3, a) integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed real*8 a(0 : n1max - 1, 0 : n2max - 1, 0 : *), drnd seed = 0 do j3 = 0, n3 - 1 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 a(j1, j2, j3) = drnd(seed) end do end do end do end ! ! function errorcheck3d(n1max, n2max, n1, n2, n3, scale, a) integer n1max, n2max, n1, n2, n3, j1, j2, j3, seed real*8 scale, a(0 : n1max - 1, 0 : n2max - 1, 0 : *), & drnd, err, e, errorcheck3d err = 0 seed = 0 do j3 = 0, n3 - 1 do j2 = 0, n2 - 1 do j1 = 0, n1 - 1 e = drnd(seed) - a(j1, j2, j3) * scale err = max(err, abs(e)) end do end do end do errorcheck3d = err end ! ! ! random number generator, 0 <= drnd < 1 real*8 function drnd(seed) integer seed seed = mod(seed * 7141 + 54773, 259200) drnd = seed * (1.0d0 / 259200) end ! OouraFFT-1.0/sample2d/shrtdctt.c000066400000000000000000000030101246725775500165270ustar00rootroot00000000000000/* test of shrtdct.c */ #include #include #define MAX(x,y) ((x) > (y) ? (x) : (y)) /* random number generator, 0 <= RND < 1 */ #define RND(p) ((*(p) = (*(p) * 7141 + 54773) % 259200) * (1.0 / 259200)) #define NMAX 16 int main() { void ddct8x8s(int isgn, double **a); void ddct16x16s(int isgn, double **a); void putdata2d(int n1, int n2, double **a); double errorcheck2d(int n1, int n2, double scale, double **a); double err; int i; double aarr[NMAX][NMAX], *a[NMAX], barr[NMAX][NMAX], *b[NMAX]; for (i = 0; i < NMAX; i++) a[i] = aarr[i]; for (i = 0; i < NMAX; i++) b[i] = barr[i]; /* check of 8x8 DCT */ putdata2d(8, 8, a); ddct8x8s(-1, a); ddct8x8s(1, a); err = errorcheck2d(8, 8, 1.0, a); printf("ddct8x8s err= %g\n", err); /* check of 16x16 DCT */ putdata2d(16, 16, a); ddct16x16s(-1, a); ddct16x16s(1, a); err = errorcheck2d(16, 16, 1.0, a); printf("ddct16x16s err= %g\n", err); return 0; } void putdata2d(int n1, int n2, double **a) { int j1, j2, seed = 0; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { a[j1][j2] = RND(&seed); } } } double errorcheck2d(int n1, int n2, double scale, double **a) { int j1, j2, seed = 0; double err = 0, e; for (j1 = 0; j1 <= n1 - 1; j1++) { for (j2 = 0; j2 <= n2 - 1; j2++) { e = RND(&seed) - a[j1][j2] * scale; err = MAX(err, fabs(e)); } } return err; } OouraFFT-1.0/shrtdct.c000066400000000000000000000442061246725775500146500ustar00rootroot00000000000000/* Short Discrete Cosine Transform data length :8x8, 16x16 method :row-column, radix 4 FFT functions ddct8x8s : 8x8 DCT ddct16x16s: 16x16 DCT function prototypes void ddct8x8s(int isgn, double **a); void ddct16x16s(int isgn, double **a); */ /* -------- 8x8 DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] Normalized 8x8 IDCT C[k1][k2] = (1/4) * sum_j1=0^7 sum_j2=0^7 a[j1][j2] * s[j1] * s[j2] * cos(pi*j1*(k1+1/2)/8) * cos(pi*j2*(k2+1/2)/8), 0<=k1<8, 0<=k2<8 (s[0] = 1/sqrt(2), s[j] = 1, j > 0) Normalized 8x8 DCT C[k1][k2] = (1/4) * s[k1] * s[k2] * sum_j1=0^7 sum_j2=0^7 a[j1][j2] * cos(pi*(j1+1/2)*k1/8) * cos(pi*(j2+1/2)*k2/8), 0<=k1<8, 0<=k2<8 (s[0] = 1/sqrt(2), s[j] = 1, j > 0) [usage] ddct8x8s(1, a); ddct8x8s(-1, a); [parameters] a[0...7][0...7] :input/output data (double **) output data a[k1][k2] = C[k1][k2], 0<=k1<8, 0<=k2<8 */ /* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ /* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ /* Wn_kR = cos(pi/2*k/n) */ /* Wn_kI = sin(pi/2*k/n) */ #define C8_1R 0.49039264020161522456 #define C8_1I 0.09754516100806413392 #define C8_2R 0.46193976625564337806 #define C8_2I 0.19134171618254488586 #define C8_3R 0.41573480615127261854 #define C8_3I 0.27778511650980111237 #define C8_4R 0.35355339059327376220 #define W8_4R 0.70710678118654752440 void ddct8x8s(int isgn, double **a) { int j; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; double xr, xi; if (isgn < 0) { for (j = 0; j <= 7; j++) { x0r = a[0][j] + a[7][j]; x1r = a[0][j] - a[7][j]; x0i = a[2][j] + a[5][j]; x1i = a[2][j] - a[5][j]; x2r = a[4][j] + a[3][j]; x3r = a[4][j] - a[3][j]; x2i = a[6][j] + a[1][j]; x3i = a[6][j] - a[1][j]; xr = x0r + x2r; xi = x0i + x2i; a[0][j] = C8_4R * (xr + xi); a[4][j] = C8_4R * (xr - xi); xr = x0r - x2r; xi = x0i - x2i; a[2][j] = C8_2R * xr - C8_2I * xi; a[6][j] = C8_2R * xi + C8_2I * xr; xr = W8_4R * (x1i - x3i); x1i = W8_4R * (x1i + x3i); x3i = x1i - x3r; x1i += x3r; x3r = x1r - xr; x1r += xr; a[1][j] = C8_1R * x1r - C8_1I * x1i; a[7][j] = C8_1R * x1i + C8_1I * x1r; a[3][j] = C8_3R * x3r - C8_3I * x3i; a[5][j] = C8_3R * x3i + C8_3I * x3r; } for (j = 0; j <= 7; j++) { x0r = a[j][0] + a[j][7]; x1r = a[j][0] - a[j][7]; x0i = a[j][2] + a[j][5]; x1i = a[j][2] - a[j][5]; x2r = a[j][4] + a[j][3]; x3r = a[j][4] - a[j][3]; x2i = a[j][6] + a[j][1]; x3i = a[j][6] - a[j][1]; xr = x0r + x2r; xi = x0i + x2i; a[j][0] = C8_4R * (xr + xi); a[j][4] = C8_4R * (xr - xi); xr = x0r - x2r; xi = x0i - x2i; a[j][2] = C8_2R * xr - C8_2I * xi; a[j][6] = C8_2R * xi + C8_2I * xr; xr = W8_4R * (x1i - x3i); x1i = W8_4R * (x1i + x3i); x3i = x1i - x3r; x1i += x3r; x3r = x1r - xr; x1r += xr; a[j][1] = C8_1R * x1r - C8_1I * x1i; a[j][7] = C8_1R * x1i + C8_1I * x1r; a[j][3] = C8_3R * x3r - C8_3I * x3i; a[j][5] = C8_3R * x3i + C8_3I * x3r; } } else { for (j = 0; j <= 7; j++) { x1r = C8_1R * a[1][j] + C8_1I * a[7][j]; x1i = C8_1R * a[7][j] - C8_1I * a[1][j]; x3r = C8_3R * a[3][j] + C8_3I * a[5][j]; x3i = C8_3R * a[5][j] - C8_3I * a[3][j]; xr = x1r - x3r; xi = x1i + x3i; x1r += x3r; x3i -= x1i; x1i = W8_4R * (xr + xi); x3r = W8_4R * (xr - xi); xr = C8_2R * a[2][j] + C8_2I * a[6][j]; xi = C8_2R * a[6][j] - C8_2I * a[2][j]; x0r = C8_4R * (a[0][j] + a[4][j]); x0i = C8_4R * (a[0][j] - a[4][j]); x2r = x0r - xr; x2i = x0i - xi; x0r += xr; x0i += xi; a[0][j] = x0r + x1r; a[7][j] = x0r - x1r; a[2][j] = x0i + x1i; a[5][j] = x0i - x1i; a[4][j] = x2r - x3i; a[3][j] = x2r + x3i; a[6][j] = x2i - x3r; a[1][j] = x2i + x3r; } for (j = 0; j <= 7; j++) { x1r = C8_1R * a[j][1] + C8_1I * a[j][7]; x1i = C8_1R * a[j][7] - C8_1I * a[j][1]; x3r = C8_3R * a[j][3] + C8_3I * a[j][5]; x3i = C8_3R * a[j][5] - C8_3I * a[j][3]; xr = x1r - x3r; xi = x1i + x3i; x1r += x3r; x3i -= x1i; x1i = W8_4R * (xr + xi); x3r = W8_4R * (xr - xi); xr = C8_2R * a[j][2] + C8_2I * a[j][6]; xi = C8_2R * a[j][6] - C8_2I * a[j][2]; x0r = C8_4R * (a[j][0] + a[j][4]); x0i = C8_4R * (a[j][0] - a[j][4]); x2r = x0r - xr; x2i = x0i - xi; x0r += xr; x0i += xi; a[j][0] = x0r + x1r; a[j][7] = x0r - x1r; a[j][2] = x0i + x1i; a[j][5] = x0i - x1i; a[j][4] = x2r - x3i; a[j][3] = x2r + x3i; a[j][6] = x2i - x3r; a[j][1] = x2i + x3r; } } } /* -------- 16x16 DCT (Discrete Cosine Transform) / Inverse of DCT -------- [definition] Normalized 16x16 IDCT C[k1][k2] = (1/8) * sum_j1=0^15 sum_j2=0^15 a[j1][j2] * s[j1] * s[j2] * cos(pi*j1*(k1+1/2)/16) * cos(pi*j2*(k2+1/2)/16), 0<=k1<16, 0<=k2<16 (s[0] = 1/sqrt(2), s[j] = 1, j > 0) Normalized 16x16 DCT C[k1][k2] = (1/8) * s[k1] * s[k2] * sum_j1=0^15 sum_j2=0^15 a[j1][j2] * cos(pi*(j1+1/2)*k1/16) * cos(pi*(j2+1/2)*k2/16), 0<=k1<16, 0<=k2<16 (s[0] = 1/sqrt(2), s[j] = 1, j > 0) [usage] ddct16x16s(1, a); ddct16x16s(-1, a); [parameters] a[0...15][0...15] :input/output data (double **) output data a[k1][k2] = C[k1][k2], 0<=k1<16, 0<=k2<16 */ /* Cn_kR = sqrt(2.0/n) * cos(pi/2*k/n) */ /* Cn_kI = sqrt(2.0/n) * sin(pi/2*k/n) */ /* Wn_kR = cos(pi/2*k/n) */ /* Wn_kI = sin(pi/2*k/n) */ #define C16_1R 0.35185093438159561476 #define C16_1I 0.03465429229977286565 #define C16_2R 0.34675996133053686546 #define C16_2I 0.06897484482073575308 #define C16_3R 0.33832950029358816957 #define C16_3I 0.10263113188058934529 #define C16_4R 0.32664074121909413196 #define C16_4I 0.13529902503654924610 #define C16_5R 0.31180625324666780814 #define C16_5I 0.16666391461943662432 #define C16_6R 0.29396890060483967924 #define C16_6I 0.19642373959677554532 #define C16_7R 0.27330046675043937206 #define C16_7I 0.22429189658565907106 #define C16_8R 0.25 #define W16_4R 0.92387953251128675613 #define W16_4I 0.38268343236508977173 #define W16_8R 0.70710678118654752440 void ddct16x16s(int isgn, double **a) { int j; double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; double x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i; double xr, xi; if (isgn < 0) { for (j = 0; j <= 15; j++) { x4r = a[0][j] - a[15][j]; xr = a[0][j] + a[15][j]; x4i = a[8][j] - a[7][j]; xi = a[8][j] + a[7][j]; x0r = xr + xi; x0i = xr - xi; x5r = a[2][j] - a[13][j]; xr = a[2][j] + a[13][j]; x5i = a[10][j] - a[5][j]; xi = a[10][j] + a[5][j]; x1r = xr + xi; x1i = xr - xi; x6r = a[4][j] - a[11][j]; xr = a[4][j] + a[11][j]; x6i = a[12][j] - a[3][j]; xi = a[12][j] + a[3][j]; x2r = xr + xi; x2i = xr - xi; x7r = a[6][j] - a[9][j]; xr = a[6][j] + a[9][j]; x7i = a[14][j] - a[1][j]; xi = a[14][j] + a[1][j]; x3r = xr + xi; x3i = xr - xi; xr = x0r + x2r; xi = x1r + x3r; a[0][j] = C16_8R * (xr + xi); a[8][j] = C16_8R * (xr - xi); xr = x0r - x2r; xi = x1r - x3r; a[4][j] = C16_4R * xr - C16_4I * xi; a[12][j] = C16_4R * xi + C16_4I * xr; x0r = W16_8R * (x1i - x3i); x2r = W16_8R * (x1i + x3i); xr = x0i + x0r; xi = x2r + x2i; a[2][j] = C16_2R * xr - C16_2I * xi; a[14][j] = C16_2R * xi + C16_2I * xr; xr = x0i - x0r; xi = x2r - x2i; a[6][j] = C16_6R * xr - C16_6I * xi; a[10][j] = C16_6R * xi + C16_6I * xr; xr = W16_8R * (x6r - x6i); xi = W16_8R * (x6i + x6r); x6r = x4r - xr; x6i = x4i - xi; x4r += xr; x4i += xi; xr = W16_4I * x7r - W16_4R * x7i; xi = W16_4I * x7i + W16_4R * x7r; x7r = W16_4R * x5r - W16_4I * x5i; x7i = W16_4R * x5i + W16_4I * x5r; x5r = x7r + xr; x5i = x7i + xi; x7r -= xr; x7i -= xi; xr = x4r + x5r; xi = x5i + x4i; a[1][j] = C16_1R * xr - C16_1I * xi; a[15][j] = C16_1R * xi + C16_1I * xr; xr = x4r - x5r; xi = x5i - x4i; a[7][j] = C16_7R * xr - C16_7I * xi; a[9][j] = C16_7R * xi + C16_7I * xr; xr = x6r - x7i; xi = x7r + x6i; a[5][j] = C16_5R * xr - C16_5I * xi; a[11][j] = C16_5R * xi + C16_5I * xr; xr = x6r + x7i; xi = x7r - x6i; a[3][j] = C16_3R * xr - C16_3I * xi; a[13][j] = C16_3R * xi + C16_3I * xr; } for (j = 0; j <= 15; j++) { x4r = a[j][0] - a[j][15]; xr = a[j][0] + a[j][15]; x4i = a[j][8] - a[j][7]; xi = a[j][8] + a[j][7]; x0r = xr + xi; x0i = xr - xi; x5r = a[j][2] - a[j][13]; xr = a[j][2] + a[j][13]; x5i = a[j][10] - a[j][5]; xi = a[j][10] + a[j][5]; x1r = xr + xi; x1i = xr - xi; x6r = a[j][4] - a[j][11]; xr = a[j][4] + a[j][11]; x6i = a[j][12] - a[j][3]; xi = a[j][12] + a[j][3]; x2r = xr + xi; x2i = xr - xi; x7r = a[j][6] - a[j][9]; xr = a[j][6] + a[j][9]; x7i = a[j][14] - a[j][1]; xi = a[j][14] + a[j][1]; x3r = xr + xi; x3i = xr - xi; xr = x0r + x2r; xi = x1r + x3r; a[j][0] = C16_8R * (xr + xi); a[j][8] = C16_8R * (xr - xi); xr = x0r - x2r; xi = x1r - x3r; a[j][4] = C16_4R * xr - C16_4I * xi; a[j][12] = C16_4R * xi + C16_4I * xr; x0r = W16_8R * (x1i - x3i); x2r = W16_8R * (x1i + x3i); xr = x0i + x0r; xi = x2r + x2i; a[j][2] = C16_2R * xr - C16_2I * xi; a[j][14] = C16_2R * xi + C16_2I * xr; xr = x0i - x0r; xi = x2r - x2i; a[j][6] = C16_6R * xr - C16_6I * xi; a[j][10] = C16_6R * xi + C16_6I * xr; xr = W16_8R * (x6r - x6i); xi = W16_8R * (x6i + x6r); x6r = x4r - xr; x6i = x4i - xi; x4r += xr; x4i += xi; xr = W16_4I * x7r - W16_4R * x7i; xi = W16_4I * x7i + W16_4R * x7r; x7r = W16_4R * x5r - W16_4I * x5i; x7i = W16_4R * x5i + W16_4I * x5r; x5r = x7r + xr; x5i = x7i + xi; x7r -= xr; x7i -= xi; xr = x4r + x5r; xi = x5i + x4i; a[j][1] = C16_1R * xr - C16_1I * xi; a[j][15] = C16_1R * xi + C16_1I * xr; xr = x4r - x5r; xi = x5i - x4i; a[j][7] = C16_7R * xr - C16_7I * xi; a[j][9] = C16_7R * xi + C16_7I * xr; xr = x6r - x7i; xi = x7r + x6i; a[j][5] = C16_5R * xr - C16_5I * xi; a[j][11] = C16_5R * xi + C16_5I * xr; xr = x6r + x7i; xi = x7r - x6i; a[j][3] = C16_3R * xr - C16_3I * xi; a[j][13] = C16_3R * xi + C16_3I * xr; } } else { for (j = 0; j <= 15; j++) { x5r = C16_1R * a[1][j] + C16_1I * a[15][j]; x5i = C16_1R * a[15][j] - C16_1I * a[1][j]; xr = C16_7R * a[7][j] + C16_7I * a[9][j]; xi = C16_7R * a[9][j] - C16_7I * a[7][j]; x4r = x5r + xr; x4i = x5i - xi; x5r -= xr; x5i += xi; x7r = C16_5R * a[5][j] + C16_5I * a[11][j]; x7i = C16_5R * a[11][j] - C16_5I * a[5][j]; xr = C16_3R * a[3][j] + C16_3I * a[13][j]; xi = C16_3R * a[13][j] - C16_3I * a[3][j]; x6r = x7r + xr; x6i = x7i - xi; x7r -= xr; x7i += xi; xr = x4r - x6r; xi = x4i - x6i; x4r += x6r; x4i += x6i; x6r = W16_8R * (xi + xr); x6i = W16_8R * (xi - xr); xr = x5r + x7i; xi = x5i - x7r; x5r -= x7i; x5i += x7r; x7r = W16_4I * x5r + W16_4R * x5i; x7i = W16_4I * x5i - W16_4R * x5r; x5r = W16_4R * xr + W16_4I * xi; x5i = W16_4R * xi - W16_4I * xr; xr = C16_4R * a[4][j] + C16_4I * a[12][j]; xi = C16_4R * a[12][j] - C16_4I * a[4][j]; x2r = C16_8R * (a[0][j] + a[8][j]); x3r = C16_8R * (a[0][j] - a[8][j]); x0r = x2r + xr; x1r = x3r + xi; x2r -= xr; x3r -= xi; x0i = C16_2R * a[2][j] + C16_2I * a[14][j]; x2i = C16_2R * a[14][j] - C16_2I * a[2][j]; x1i = C16_6R * a[6][j] + C16_6I * a[10][j]; x3i = C16_6R * a[10][j] - C16_6I * a[6][j]; xr = x0i - x1i; xi = x2i + x3i; x0i += x1i; x2i -= x3i; x1i = W16_8R * (xi + xr); x3i = W16_8R * (xi - xr); xr = x0r + x0i; xi = x0r - x0i; a[0][j] = xr + x4r; a[15][j] = xr - x4r; a[8][j] = xi + x4i; a[7][j] = xi - x4i; xr = x1r + x1i; xi = x1r - x1i; a[2][j] = xr + x5r; a[13][j] = xr - x5r; a[10][j] = xi + x5i; a[5][j] = xi - x5i; xr = x2r + x2i; xi = x2r - x2i; a[4][j] = xr + x6r; a[11][j] = xr - x6r; a[12][j] = xi + x6i; a[3][j] = xi - x6i; xr = x3r + x3i; xi = x3r - x3i; a[6][j] = xr + x7r; a[9][j] = xr - x7r; a[14][j] = xi + x7i; a[1][j] = xi - x7i; } for (j = 0; j <= 15; j++) { x5r = C16_1R * a[j][1] + C16_1I * a[j][15]; x5i = C16_1R * a[j][15] - C16_1I * a[j][1]; xr = C16_7R * a[j][7] + C16_7I * a[j][9]; xi = C16_7R * a[j][9] - C16_7I * a[j][7]; x4r = x5r + xr; x4i = x5i - xi; x5r -= xr; x5i += xi; x7r = C16_5R * a[j][5] + C16_5I * a[j][11]; x7i = C16_5R * a[j][11] - C16_5I * a[j][5]; xr = C16_3R * a[j][3] + C16_3I * a[j][13]; xi = C16_3R * a[j][13] - C16_3I * a[j][3]; x6r = x7r + xr; x6i = x7i - xi; x7r -= xr; x7i += xi; xr = x4r - x6r; xi = x4i - x6i; x4r += x6r; x4i += x6i; x6r = W16_8R * (xi + xr); x6i = W16_8R * (xi - xr); xr = x5r + x7i; xi = x5i - x7r; x5r -= x7i; x5i += x7r; x7r = W16_4I * x5r + W16_4R * x5i; x7i = W16_4I * x5i - W16_4R * x5r; x5r = W16_4R * xr + W16_4I * xi; x5i = W16_4R * xi - W16_4I * xr; xr = C16_4R * a[j][4] + C16_4I * a[j][12]; xi = C16_4R * a[j][12] - C16_4I * a[j][4]; x2r = C16_8R * (a[j][0] + a[j][8]); x3r = C16_8R * (a[j][0] - a[j][8]); x0r = x2r + xr; x1r = x3r + xi; x2r -= xr; x3r -= xi; x0i = C16_2R * a[j][2] + C16_2I * a[j][14]; x2i = C16_2R * a[j][14] - C16_2I * a[j][2]; x1i = C16_6R * a[j][6] + C16_6I * a[j][10]; x3i = C16_6R * a[j][10] - C16_6I * a[j][6]; xr = x0i - x1i; xi = x2i + x3i; x0i += x1i; x2i -= x3i; x1i = W16_8R * (xi + xr); x3i = W16_8R * (xi - xr); xr = x0r + x0i; xi = x0r - x0i; a[j][0] = xr + x4r; a[j][15] = xr - x4r; a[j][8] = xi + x4i; a[j][7] = xi - x4i; xr = x1r + x1i; xi = x1r - x1i; a[j][2] = xr + x5r; a[j][13] = xr - x5r; a[j][10] = xi + x5i; a[j][5] = xi - x5i; xr = x2r + x2i; xi = x2r - x2i; a[j][4] = xr + x6r; a[j][11] = xr - x6r; a[j][12] = xi + x6i; a[j][3] = xi - x6i; xr = x3r + x3i; xi = x3r - x3i; a[j][6] = xr + x7r; a[j][9] = xr - x7r; a[j][14] = xi + x7i; a[j][1] = xi - x7i; } } }