debian/0000775000000000000000000000000012253246567007204 5ustar debian/rules0000775000000000000000000001011711665027776010271 0ustar #!/usr/bin/make -f ARCHITECTURE := $(shell dpkg-architecture -qDEB_HOST_ARCH) DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) CFLAGS = -Wall -g ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) CFLAGS += -O0 else CFLAGS += -O2 endif CONFFLAGS := --host=$(DEB_HOST_GNU_TYPE) --build=$(DEB_BUILD_GNU_TYPE) --prefix=/usr --mandir=\$${prefix}/share/man --infodir=\$${prefix}/share/info --enable-shared --enable-mpi --enable-threads ifeq ($(ARCHITECTURE), i386) ARCHCONFFLAGS := --enable-i386-hacks endif build: build-arch build-indep autoreconf-stamp: autoreconf -f -i -I. touch autoreconf-stamp build-arch: build-arch-stamp build-arch-stamp: autoreconf-stamp dh_testdir # single precision F77=gfortran CFLAGS="$(CFLAGS)" CC=mpicc ./configure $(CONFFLAGS) --enable-float --enable-type-prefix $(ARCHCONFFLAGS) $(MAKE) #$(MAKE) -C tests check ./tests/fftw_test -t -e -v -p 1024 -x 1 ./tests/rfftw_test -t -e -v -p 1024 -x 1 $(MAKE) install DESTDIR=$(CURDIR)/debian/tmp-single $(MAKE) clean # double precision F77=gfortran CFLAGS="$(CFLAGS)" CC=mpicc ./configure $(CONFFLAGS) $(ARCHCONFFLAGS) $(MAKE) #$(MAKE) -C tests check ./tests/fftw_test -t -e -v -p 1024 -x 1 ./tests/rfftw_test -t -e -v -p 1024 -x 1 $(MAKE) install DESTDIR=$(CURDIR)/debian/tmp-double touch build-arch-stamp build-indep: build-indep-stamp build-indep-stamp: autoreconf-stamp # docs F77=gfortran CFLAGS="$(CFLAGS)" ./configure $(CONFFLAGS) --enable-float --enable-type-prefix $(ARCHCONFFLAGS) $(MAKE) -C doc $(MAKE) -C doc html $(MAKE) -C FAQ touch build-indep-stamp clean: clean1st clean1st: dh_testdir dh_testroot rm -f build-arch-stamp build-indep-stamp autoreconf-stamp # Add here commands to clean up after the build process. [ ! -f Makefile ] || $(MAKE) distclean [ ! -f Makefile ] || $(MAKE) -C doc clean-local [ ! -f Makefile ] || $(MAKE) -C FAQ clean rm -rf $(CURDIR)/debian/tmp-* dh_clean install: install-indep install-arch install-indep: dh_testdir dh_testroot dh_clean -k -i dh_installdirs -i dh_install -p fftw-docs FAQ/fftw-faq.html/*.html usr/share/doc/fftw-docs/FAQ dh_install -p fftw-docs doc/*.html usr/share/doc/fftw-docs/html dh_install -p fftw-docs doc/*.gif usr/share/doc/fftw-docs/html # dh_install -p fftw-docs debian/mpi/Makefile mpi/TOMS_transpose.h \ mpi/fftw_mpi_test.c mpi/rfftw_mpi_test.c mpi/sched.h \ mpi/test_sched.c mpi/test_transpose_mpi.c \ usr/share/doc/fftw-docs/examples/mpi # dh_install -p fftw-docs debian/tests/Makefile fftw/config.h fftw/fftw-int.h usr/share/doc/fftw-docs/examples/tests for i in README fftw_test.c rfftw_test.c test_main.c test_main.h; do \ dh_install -p fftw-docs tests/$$i usr/share/doc/fftw-docs/examples/tests; \ done # dh_install -p fftw-docs debian/threads/Makefile usr/share/doc/fftw-docs/examples/threads for i in fftw_threads-int.h fftw_threads_test.c rfftw_threads_test.c; do \ dh_install -p fftw-docs threads/$$i usr/share/doc/fftw-docs/examples/threads; \ done install-arch: dh_testdir dh_testroot dh_clean -k -s dh_installdirs -s dh_install --sourcedir=$(CURDIR)/debian/tmp-single -p sfftw2 dh_install --sourcedir=$(CURDIR)/debian/tmp-single -p sfftw-dev dh_install --sourcedir=$(CURDIR)/debian/tmp-double -p fftw2 dh_install --sourcedir=$(CURDIR)/debian/tmp-double -p fftw-dev dh_install -p sfftw-dev fortran/fftw_f77.i usr/share/doc/sfftw-dev/fortran dh_install -p fftw-dev fortran/fftw_f77.i usr/share/doc/fftw-dev/fortran binary-common: dh_testdir dh_testroot dh_installchangelogs ChangeLog dh_installdocs dh_installexamples dh_installman dh_installinfo dh_link dh_strip dh_compress -X.c -X.h -XMakefile dh_fixperms dh_makeshlibs dh_installdeb dh_shlibdeps dh_gencontrol dh_md5sums dh_builddeb binary-indep: build-indep install-indep $(MAKE) -f debian/rules DH_OPTIONS=-i binary-common binary-arch: build-arch install-arch $(MAKE) -f debian/rules DH_OPTIONS=-a binary-common binary: binary-arch binary-indep .PHONY: build clean binary-indep binary-arch binary install install-indep install-arch clean1st autoreconf-stamp debian/fftw-dev.dirs0000664000000000000000000000002411665017205011574 0ustar usr/lib usr/include debian/changelog0000664000000000000000000002142512253246567011062 0ustar fftw (2.1.5-1build1) trusty; urgency=medium * No-change rebuild for libopenmpi1.3 -> libopenmpi1.6 transition. -- Logan Rosen Sun, 15 Dec 2013 01:34:19 -0500 fftw (2.1.5-1) unstable; urgency=low * Team upload. * New upstream release * Package moved into the Debian Science team (no answer from the previous maintainer) and package not maintained. * Standards-Version updated to version 3.9.2 * Vcs-Browser & Vcs-Svn updated * Switch to mpi-default-dev (Closes: #571446) * Get ride of .la files (Closes: #633175) * Fix lintian warning debhelper-but-no-misc-depends * Fix lintian warning patch-system-but-direct-changes-in-diff * Switch to dpkg-source 3.0 (quilt) format -- Sylvestre Ledru Tue, 29 Nov 2011 01:48:33 +0100 fftw (2.1.3-22) unstable; urgency=low * Add missed patches from 2.1.3-20.1 to dpatch (closes: #480908) * Call autoreconf to avoid shipping Makefile.inS in diff.gz * debian/control: add build-depends on autotools-dev, autoconf and automake * debian/control: add build-conflicts on autoconf2.13, automake1.4 * debian/rules: add build-depends on dpatch * debian/rules: use $(CURDIR), not `pwd` * debian/rules: do not ignore clean failures * debian/control: bump Standards-Version to 3.7.3 * debian/control: fftw-docs to Section doc * debian/control: {,s}fftw-dev use binary:Version, not Source-Version -- Paul Brossier Wed, 14 May 2008 11:23:40 +0200 fftw (2.1.3-21) unstable; urgency=low * Acknowledge non-maintainer upload, thanks to Colin Tuckley (closes: #466366) * Fix bashism in debian/rules, thanks to Chris Lamb and Raphael Geissert (closes: #478375) -- Paul Brossier Sat, 10 May 2008 19:46:07 +0200 fftw (2.1.3-20.1) unstable; urgency=low * Non-maintainer Upload. * Use/depend on gfortran based Lapack and Blas packages (Closes: #466366). * Build with gfortran instead of g77. -- Colin Tuckley Mon, 25 Feb 2008 09:37:07 +0000 fftw (2.1.3-20) unstable; urgency=low * Add working makefiles to fftw-docs (closes: #378550) -- Paul Brossier Thu, 5 Oct 2006 01:53:10 +0200 fftw (2.1.3-19) unstable; urgency=low * Patch tests/test_main.c to workaround buggy counter when using gcc 4.1 on 64 bit machines (many thanks to Vincent Fourmond, closes: #373647) -- Paul Brossier Sat, 1 Jul 2006 11:53:33 +0200 fftw (2.1.3-18) unstable; urgency=low * Fix inclusions in generated headers (closes: #372518) * Realign paragraphs in long descriptions * Change Priority to extra -- Paul Brossier Sat, 10 Jun 2006 14:32:06 +0200 fftw (2.1.3-17) unstable; urgency=low * Adopt package (closes: #263126). * Acknowledge NMU (closes: #325813, #333631, #334726) * Start again packaging from dh_make templates. * Fix autoconf/automake + update generated files with autoreconf -f -i (closes: #356966) + do not use XXX_variables_XXX, use @variables@ instead + do not use -rpath in {threads,mpi}/Makefile.am (closes: #358157) + do not use EXTRA_LTLIBRARIES in {threads,mpi}/Makefile.am (libtool unhappy with installing extra libs, so always build anyway) + do not use mpicc in mpi/Makefile.am, add -lmpi to LDFLAGS + install .la files, .so links updated to new libtool * debian/control: + lower first character case in short descriptions + move fftw2 and sfftw2 to Section oldlibs + move fftw-dev and sfftw-dev to Section oldlibs (closes: #339995, #339996) + move fftw-docs to Section oldlibs + move source package to Section math + bump Standards-Version + suggests mpich-bin instead of mpich + added build-deps on debhelper and libtool * debian/rules: + always rebuild doc and FAQ + rebuilt info page is one single file + hack to install files as before + moved tests to examples in fftw-docs * debian/copyright Updated licence entries * added info section in doc/fftw.texi -- Paul Brossier Fri, 2 Jun 2006 02:17:31 +0200 fftw (2.1.3-16) unstable; urgency=low * Reuploaded to allow compilation with gcc 3.3 which will help some arches. -- James A. Treacy Sat, 9 Aug 2003 23:34:54 -0400 fftw (2.1.3-15) unstable; urgency=low * Docs moved into fftw-docs -- James A. Treacy Mon, 13 Jan 2003 18:24:05 -0500 fftw (2.1.3-14) unstable; urgency=low * added AM_MAINTAINER_MODE to configure.in. Should avoid dependency on aclocal-1.4 and allow build to work on all arches -- James A. Treacy Fri, 23 Aug 2002 15:11:23 -0400 fftw (2.1.3-13) unstable; urgency=low * Update files using newer libtool. Closes: bug#157246 -- James A. Treacy Mon, 19 Aug 2002 09:38:40 -0400 fftw (2.1.3-12) unstable; urgency=low * Don't install static libs using install -s. Closes: bug#136955 -- James A. Treacy Tue, 5 Mar 2002 00:28:49 -0500 fftw (2.1.3-11) unstable; urgency=low * sfftw conflicts: fftw2 (<= 2.1.3-10). closes: Bug#129818 -- James A. Treacy Mon, 21 Jan 2002 00:28:49 -0500 fftw (2.1.3-10) unstable; urgency=low * Change the shlibs files so dependencies get set correctly. -- James A. Treacy Sat, 12 Jan 2002 13:30:15 -0500 fftw (2.1.3-9) unstable; urgency=low * split package into single and double precision versions. This is done because some of the hand tuned versions of fftw only come in single or double precision. -- James A. Treacy Fri, 11 Jan 2002 12:42:36 -0500 fftw (2.1.3-8) unstable; urgency=low * Include fortran/fftw_f77.i. Closes: #127840 * Create the virtual packages fftw-single and fftw-double, since the assembler optimized versions of fftw only provide single or double precision. -- James A. Treacy Thu, 10 Jan 2002 23:28:56 -0500 fftw (2.1.3-7) unstable; urgency=low * Rebuild to fix signing problem -- James A. Treacy Wed, 18 Jul 2001 13:59:24 -0500 fftw (2.1.3-6) unstable; urgency=low * Remove config.cache. Closes: bug #105343 -- James A. Treacy Tue, 17 Jul 2001 13:59:24 -0500 fftw (2.1.3-5) unstable; urgency=low * Update config.guess and config.sub. Closes: bug #103887 -- James A. Treacy Wed, 10 Jan 2001 13:59:24 -0500 fftw (2.1.3-4) unstable; urgency=low * Build single precision libraries. Closes: bug #92834, #93646 -- James A. Treacy Wed, 10 Jan 2001 13:59:24 -0500 fftw (2.1.3-3) unstable; urgency=low * Add g77 to the Build-Depends. Closes: bug #81804 -- James A. Treacy Wed, 10 Jan 2001 13:59:24 -0500 fftw (2.1.3-2) unstable; urgency=low * Corrected the descriptions. Closes: bug #51459, #56585 -- James A. Treacy Sun, 28 Nov 1999 23:42:24 -0500 fftw (2.1.3-1) unstable; urgency=low * New upstream version. -- James A. Treacy Mon, 8 Nov 1999 13:56:53 -0500 fftw (2.1.2-1) unstable; urgency=low * New Upstream version. -- James A. Treacy Fri, 28 May 1999 01:03:42 -0400 fftw (2.1.1-1) unstable; urgency=low * New Upstream version. -- James A. Treacy Thu, 1 Apr 1999 01:58:09 -0500 fftw (2.1-1) unstable; urgency=low * New Upstream version. * info documentation now included. Fixes bug #33140 -- James A. Treacy Tue, 9 Mar 1999 12:35:27 -0500 fftw (2.0.1-2) unstable; urgency=low * Include threaded version of library (uses different name). -- James A. Treacy Mon, 1 Feb 1999 16:23:46 -0500 fftw (2.0.1-1) unstable; urgency=low * New Upstream version. -- James A. Treacy Thu, 1 Oct 1998 13:10:51 -0400 fftw (2.0-1) unstable; urgency=low * New Upstream version. -- James A. Treacy Fri, 11 Sep 1998 16:00:00 -0400 fftw (1.3-2) unstable; urgency=low * missing .gif files added to documentation. Fixes bug #22304 -- James A. Treacy Sun, 24 May 1998 00:03:36 -0400 fftw (1.3-1) unstable; urgency=low * new upstream version * released under the GPL -- James A. Treacy Fri, 17 Apr 1998 03:09:51 -0400 fftw (1.2.1-4) unstable; urgency=low * made -dev package depend on specific version of fftw1 -- James A. Treacy Mon, 16 Feb 1998 23:15:16 -0500 fftw (1.2.1-3) unstable; urgency=low * Got rid of lintian bugs. -- James A. Treacy Thu, 12 Feb 1998 15:03:14 -0500 fftw (1.2.1-2) unstable; urgency=low * Added missing copyright file -- James A. Treacy Mon, 2 Feb 1998 15:34:33 -0500 fftw (1.2.1-1) unstable; urgency=low * Initial Release. -- James A. Treacy Fri, 9 Jan 1998 11:42:20 -0500 debian/fftw2.install0000664000000000000000000000002211665017205011605 0ustar usr/lib/lib*.so.* debian/compat0000664000000000000000000000000211665017205010371 0ustar 4 debian/sfftw-dev.install0000664000000000000000000000005511665024540012470 0ustar usr/include/* usr/lib/lib*.a usr/lib/lib*.so debian/fftw-docs.info0000664000000000000000000000001611665017205011741 0ustar doc/fftw.info debian/mpi/0000775000000000000000000000000011665017233007761 5ustar debian/mpi/Makefile0000664000000000000000000000113011665017205011413 0ustar all: gcc -c test_sched.c -o test_sched.o gcc -I/usr/lib/mpich/include -I../tests -c fftw_mpi_test.c -o fftw_mpi_test.o gcc -I/usr/lib/mpich/include -I../tests -c rfftw_mpi_test.c -o rfftw_mpi_test.o gcc -I/usr/lib/mpich/include -c test_transpose_mpi.c -o test_transpose_mpi.o gcc -lfftw_mpi test_sched.o -o test_sched gcc -lfftw_mpi fftw_mpi_test.o ../tests/test_main.o -o fftw_mpi_test gcc -lrfftw_mpi rfftw_mpi_test.o ../tests/test_main.o -o rfftw_mpi_test gcc -lfftw_mpi test_transpose_mpi.o -o test_transpose_mpi clean: rm *.o test_sched fftw_mpi_test rfftw_mpi_test test_transpose_mpi debian/copyright0000664000000000000000000000114011665017205011122 0ustar This package was debianized by James A. Treacy treacy@debian.org on Wed, 30 Sep 1998 03:09:51 -0400. It was downloaded from http://www.fftw.org/download.html The homepage for fftw is at http://www.fftw.org/ Copyright: Copyright (c) 1997,1998 Massachusetts Institute of Technology License: This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. A copy of the GPL can be found in /usr/share/common-licenses/GPL debian/source/0000775000000000000000000000000011665025740010476 5ustar debian/source/format0000664000000000000000000000001411665025740011704 0ustar 3.0 (quilt) debian/fftw-docs.docs0000664000000000000000000000006111665017205011736 0ustar NEWS README README.hacks TODO FAQ/fftw-faq.ascii debian/README.Debian0000664000000000000000000000572511665017205011245 0ustar fftw for Debian --------------- Note: k6fftwgel and k7fftwgel only contain single precision routines at this time. p4fftwgel only contains double precision routines. There are a number of Debian packages for fftw. For any architecture, you will need the base package which contains the shared libraries (for non-i386 there is only fftw2). For i386, there is a choice of packages. The one you select is based on your cpu: Arch Base Package Devel Package Precision Provided i386 (AMD K6-2) k6fftw2 k6fftw-dev single i386 (AMD K7) k7fftw2 k7fftw-dev single i386 (Pentium and above) p4fftw2 p4fftw-dev double i386 (any) fftw2 fftw-dev double i386 (any) sfftw2 sfftw-dev single sparc (UltraSPARC-I) pfftw2 (*) pfftw-dev all others fftw2 fftw-dev double all others sfftw2 sfftw-dev single (*) Still under development so not released yet When there is a choice, the original fftw will still work, but there can be a dramatic speedup if you choose the version specifically geared for your processor. The -dev version of a package contains the headers and a statically linked copy of the libraries. You need to install this if you intend to compile a program which uses fftw. Be careful when compiling against the static libs as the library linker (ld) looks for shared libraries first. Some people prefer to use static libraries as there can be a speed advantage (3-30%) on register starved architectures (like x86 machines). The package also contains a threaded version of fftw. Using the threaded version is almost identical to using the non- threaded version, but the function calls are renamed and you need to call a special function first to handle some housekeeping. See the documentation for details. The packages also contain mpi based libraries. Mpi is a system for spreading computations over multiple machines. This relies on the mpich package. The fftw packages do not depend on mpich, but only suggests it, since fftw can be used without mpi. Again, see the documentation for details. Single and double precision versions of the library can be installed at the same time. The calling convention is identical. Which one is used is decided at compile time: to link against a single precision version, prepend an 's' to the library name. Thus, to link against libsfftw.so, for example, you would use -lsfftw. To top it all off, you can call fftw from fortran. See the docs at /usr/share/doc/fftw-dev/html/fftw_toc.html (needs fftw-dev installed). The file fortran/fftw_f77.i mentioned in the docs can be found in /usr/share/doc/fftw-dev/fortran/. If you aren't impressed yet with the functionality of fftw, there is a rumor that the next version of fftw will be able to make a good cup of coffee. James A. Treacy , Thu, 10 Jan 2002 21:42:20 -0500debian/fftw-dev.docs0000664000000000000000000000003611665017205011566 0ustar NEWS README README.hacks TODO debian/tests/0000775000000000000000000000000011665017233010336 5ustar debian/tests/Makefile0000664000000000000000000000042011665017205011771 0ustar all: gcc -I. -c fftw_test.c -o fftw_test.o gcc -I. -c rfftw_test.c -o rfftw_test.o gcc -I. -c test_main.c -o test_main.o gcc -lfftw test_main.o fftw_test.o -o test_fftw gcc -lfftw -lrfftw test_main.o rfftw_test.o -o test_rfftw clean: rm -f *.o test_fftw test_rfftw debian/sfftw2.install0000664000000000000000000000002311665017205011771 0ustar usr/lib/lib*.so.* debian/fftw2.dirs0000664000000000000000000000001011665017205011075 0ustar usr/lib debian/sfftw-dev.docs0000664000000000000000000000003611665017205011751 0ustar NEWS README README.hacks TODO debian/control0000664000000000000000000000541211665026620010601 0ustar Source: fftw Section: oldlibs Priority: extra Maintainer: Debian Science Team Uploaders: Paul Brossier Build-Depends: debhelper (>= 4.0.0), autotools-dev, autoconf, automake, libtool, mpi-default-dev, gfortran Standards-Version: 3.9.2 Vcs-Svn: svn://svn.debian.org/svn/debian-science/packages/fftw/ Vcs-Browser: http://svn.debian.org/viewsvn/debian-science/packages/fftw/ Homepage: http://fftw.org Package: fftw2 Architecture: any Section: oldlibs Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: fftw-dev, mpi-defaults-bin Provides: fftw2-double Conflicts: fftw2-double Description: library for computing Fast Fourier Transforms This library computes Fast Fourier Transforms (FFT) in one or more dimensions. It is extremely fast. This package contains the shared library version of the fftw libraries in double precision. . To get the static library and the header files you need to install fftw-dev. . For documentation and test programs, install fftw-docs Package: fftw-dev Architecture: any Section: oldlibs Depends: fftw2 (= ${binary:Version}), ${shlibs:Depends}, ${misc:Depends} Provides: fftw-double-dev Conflicts: fftw1-dev, fftw-double-dev Description: library for computing Fast Fourier Transforms This library computes FFTs in one or more dimensions. It is extremely fast. This package contains the statically linked library and the header files. Package: sfftw2 Architecture: any Section: oldlibs Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: sfftw-dev, mpi-defaults-bin Provides: fftw2-single Conflicts: fftw2-single, fftw2 (<= 2.1.3-10) Description: library for computing Fast Fourier Transforms This library computes Fast Fourier Transforms (FFT) in one or more dimensions. It is extremely fast. This package contains the shared library version of the fftw libraries in single precision. . To get the static library and the header files you need to install sfftw-dev. . For documentation and test programs, install fftw-docs Package: sfftw-dev Architecture: any Section: oldlibs Depends: sfftw2 (= ${binary:Version}), ${shlibs:Depends}, ${misc:Depends} Provides: fftw-single-dev Conflicts: fftw1-dev, fftw-dev (<< 2.1.3-8), fftw-single-dev Description: library for computing Fast Fourier Transforms This library computes FFTs in one or more dimensions. It is extremely fast. This package contains the statically linked library and the header files. Package: fftw-docs Architecture: all Section: doc Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: fftw2 Conflicts: fftw-dev (<< 2.1.3-15), k6fftwgel (<< 1.1-8), k7fftwgel (<< 1.2-6), p4fftwgel-dev (<< 1.2-5) Description: documentation for fftw This package contains the documentation and test programs for fftw, a Fast Fourier Transform library. debian/threads/0000775000000000000000000000000011665017233010626 5ustar debian/threads/Makefile0000664000000000000000000000054311665017205012267 0ustar all: gcc -I../tests -I. -c rfftw_threads_test.c -o rfftw_threads_test.o gcc -I../tests -c fftw_threads_test.c -o fftw_threads_test.o gcc -lfftw_threads fftw_threads_test.o ../tests/test_main.o -o fftw_threads_test gcc -lrfftw_threads rfftw_threads_test.o ../tests/test_main.o -o rfftw_threads_test clean: rm *.o fftw_threads_test rfftw_threads_test debian/dirs0000664000000000000000000000002111665017205010050 0ustar usr/bin usr/sbin debian/patches/0000775000000000000000000000000011665033452010624 5ustar debian/patches/05_ac_define_syntax.diff0000664000000000000000000001347111665033452015273 0ustar Index: fftw-2.1.5/configure.in =================================================================== --- fftw-2.1.5.orig/configure.in 2003-03-24 08:01:13.000000000 +0100 +++ fftw-2.1.5/configure.in 2011-11-29 02:16:51.531836400 +0100 @@ -27,7 +27,7 @@ AC_ARG_ENABLE(float, [ --enable-float compile fftw for single precision], enable_float=$enableval, enable_float=no) if test "$enable_float" = "yes"; then - AC_DEFINE(FFTW_ENABLE_FLOAT) + AC_DEFINE([FFTW_ENABLE_FLOAT],[],[desc]) fi FFTW_PREFIX="" @@ -56,27 +56,27 @@ AC_ARG_ENABLE(i386-hacks, [ --enable-i386-hacks enable gcc/x86 specific performance hacks], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_I386_HACKS) + AC_DEFINE([FFTW_ENABLE_I386_HACKS],[],[desc]) fi AC_ARG_ENABLE(pentium-timer, [ --enable-pentium-timer enable high resolution Pentium timer], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_PENTIUM_TIMER) + AC_DEFINE([FFTW_ENABLE_PENTIUM_TIMER],[],[desc]) fi AC_ARG_ENABLE(debug, [ --enable-debug compile fftw with extra runtime checks for debugging], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_DEBUG) + AC_DEFINE([FFTW_DEBUG],[],[desc]) fi AC_ARG_ENABLE(debug-alignment, [ --enable-debug-alignment enable runtime checks for alignment on x86], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_DEBUG_ALIGNMENT) + AC_DEFINE([FFTW_DEBUG_ALIGNMENT],[],[desc]) fi AC_ARG_ENABLE(vec-recurse, [ --enable-vec-recurse enable experimental performance hack], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_VECTOR_RECURSE) + AC_DEFINE([FFTW_ENABLE_VECTOR_RECURSE],[],[desc]) fi dnl ----------------------------------------------------------------------- @@ -115,7 +115,7 @@ dnl ----------------------------------------------------------------------- AC_SUBST(SHARED_VERSION_INFO) -AC_DEFINE_UNQUOTED(FFTW_VERSION, "$VERSION") +AC_DEFINE_UNQUOTED([FFTW_VERSION], "$VERSION",[Version]) # Get the version number that will be appended to shared libraries: SHARED_VERSION=`echo $SHARED_VERSION_INFO | awk -F':' '{ print $1 "." $3 "." $2 }'` @@ -123,7 +123,7 @@ ACX_PROG_CC_MAXOPT -ACX_GCC_ALIGNS_STACK(AC_DEFINE(FFTW_GCC_ALIGNS_STACK), [ +ACX_GCC_ALIGNS_STACK(AC_DEFINE([FFTW_GCC_ALIGNS_STACK],[],[desc]), [ if test "$enable_i386_hacks" = yes; then if test "${acx_gcc_stack_align_bug-no}" = yes; then # we are using a gcc with a stack alignment bug, and we should @@ -183,7 +183,7 @@ AC_TRY_LINK([#include ], if (!isnan(3.14159)) isnan(2.7183);, ok=yes, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(HAVE_ISNAN) + AC_DEFINE([HAVE_ISNAN],[],[desc]) fi AC_MSG_RESULT(${ok}) @@ -196,7 +196,7 @@ #endif ], [hrtime_t foobar;], ok=yes, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(HAVE_HRTIME_T) + AC_DEFINE([HAVE_HRTIME_T],[],[desc]) fi AC_MSG_RESULT(${ok}) @@ -205,7 +205,7 @@ AC_ARG_ENABLE(unsafe-mulmod, [ --enable-unsafe-mulmod risk overflow for large prime sizes], enable_unsafe_mulmod=$enableval, enable_unsafe_mulmod=no) if test "$enable_unsafe_mulmod" = "yes"; then - AC_DEFINE(FFTW_ENABLE_UNSAFE_MULMOD) + AC_DEFINE([FFTW_ENABLE_UNSAFE_MULMOD],[],[desc]) fi @@ -243,7 +243,7 @@ if test -z "$THREADLIBS"; then AC_MSG_ERROR([don't know how to enable OpenMP]) fi - AC_DEFINE(FFTW_USING_OPENMP_THREADS) + AC_DEFINE([FFTW_USING_OPENMP_THREADS],[],[desc]) fi if test "$with_sgimp"x != nox; then AC_MSG_CHECKING(how to enable SGI MP) @@ -261,7 +261,7 @@ if test -z "$THREADLIBS"; then AC_MSG_ERROR([don't know how to enable SGI MP]) fi - AC_DEFINE(FFTW_USING_SGIMP_THREADS) + AC_DEFINE([FFTW_USING_SGIMP_THREADS],[],[desc]) fi # POSIX threads, the default choice: @@ -270,38 +270,38 @@ ACX_PTHREAD([THREADLIBS="$PTHREAD_LIBS " CFLAGS="$CFLAGS $PTHREAD_CFLAGS" CC="$PTHREAD_CC" - AC_DEFINE(FFTW_USING_POSIX_THREADS)]) + AC_DEFINE([FFTW_USING_POSIX_THREADS],[],[desc])]) fi # Solaris threads: if test -z "$THREADLIBS"; then AC_CHECK_LIB(thread, thr_create, [THREADLIBS="-lthread" - AC_DEFINE(FFTW_USING_SOLARIS_THREADS)]) + AC_DEFINE([FFTW_USING_SOLARIS_THREADS],[],[desc])]) fi # Mach C threads: if test -z "$THREADLIBS"; then AC_CHECK_FUNC(cthread_fork, [THREADLIBS=" " - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE([FFTW_USING_MACH_THREADS],[],[desc])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi if test -z "$THREADLIBS"; then AC_CHECK_LIB(cthread, cthread_fork, [THREADLIBS="-lcthread" - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE([FFTW_USING_MACH_THREADS],[],[desc])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi if test -z "$THREADLIBS"; then AC_CHECK_LIB(cthreads, cthread_fork, [THREADLIBS="-lcthreads" - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE([FFTW_USING_MACH_THREADS],[],[desc])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi # BeOS threads: if test -z "$THREADLIBS"; then AC_CHECK_FUNC(spawn_thread, [THREADLIBS=" " - AC_DEFINE(FFTW_USING_BEOS_THREADS)]) + AC_DEFINE([FFTW_USING_BEOS_THREADS],[],[desc])]) fi if test -z "$THREADLIBS"; then AC_MSG_ERROR(couldn't find threads library for --enable-threads) @@ -334,7 +334,7 @@ CC="$MPICC" ok=yes AC_TRY_LINK([#include - ], [MPI_Comm_f2c(0);], [AC_DEFINE(HAVE_MPI_COMM_F2C)], [ok=no]) + ], [MPI_Comm_f2c(0);], [AC_DEFINE([HAVE_MPI_COMM_F2C],[],[desc])], [ok=no]) AC_MSG_RESULT($ok) CC="$save_CC" else debian/patches/series0000664000000000000000000000012511665031525012036 0ustar 02_fix_tests.dpatch #03_fix_doc.dpatch #04_configure.dpatch 05_ac_define_syntax.diff debian/patches/02_fix_tests.dpatch0000664000000000000000000000141411665017205014320 0ustar #! /bin/sh /usr/share/dpatch/dpatch-run ## 02_fix_tests by ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: moving /u/s/kino/help to /u/s/doc/kino/help @DPATCH@ --- fftw-2.1.3.orig/tests/test_main.c +++ fftw-2.1.3/tests/test_main.c @@ -548,7 +548,7 @@ void test_timer(void) { double times[32], acc, min_time = 10000.00; - unsigned long iters, iter; + unsigned long iters = 0, iter; fftw_time begin, end, start; double t, tmax, tmin; int last = 0, i, repeat; @@ -559,7 +559,8 @@ start = fftw_get_time(); for (i = 0; i < 32; i++) { - iters = 1 << i; + //iters = 1 << i; /* workaround gcc-4.1 bug */ + if (i) iters <<= 1; else iters = 1; tmin = 1.0E10; tmax = -1.0E10; debian/patches/03_fix_doc.dpatch0000664000000000000000000071144611665017205013741 0ustar #! /bin/sh /usr/share/dpatch/dpatch-run ## 03_fix_doc by ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: moving /u/s/kino/help to /u/s/doc/kino/help @DPATCH@ --- fftw-2.1.3.orig/doc/fftw.texi +++ fftw-2.1.3/doc/fftw.texi @@ -5,6 +5,15 @@ @settitle FFTW @c %**end of header +@ifinfo +@format +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* FFTW: (fftw). FFTW User's Manual. +END-INFO-DIR-ENTRY +@end format +@end ifinfo + @include version.texi @setchapternewpage odd @c define constant index (ct) --- fftw-2.1.3.orig/doc/version.texi +++ fftw-2.1.3/doc/version.texi @@ -1,3 +1,4 @@ -@set UPDATED 7 November 1999 +@set UPDATED 8 November 1999 +@set UPDATED-MONTH November 1999 @set EDITION 2.1.3 @set VERSION 2.1.3 --- fftw-2.1.3.orig/doc/fftw.info +++ fftw-2.1.3/doc/fftw.info @@ -1,5 +1,9 @@ -This is Info file fftw.info, produced by Makeinfo version 1.68 from the -input file fftw.texi. +This is fftw.info, produced by makeinfo version 4.8 from fftw.texi. + +INFO-DIR-SECTION Development +START-INFO-DIR-ENTRY +* FFTW: (fftw). FFTW User's Manual. +END-INFO-DIR-ENTRY This is the FFTW User's manual. @@ -20,90 +24,4529 @@ translation approved by the Free Software Foundation.  -Indirect: -fftw.info-1: 871 -fftw.info-2: 49529 -fftw.info-3: 98154 -fftw.info-4: 147774 -fftw.info-5: 194232 +File: fftw.info, Node: Top, Next: Introduction, Prev: (dir), Up: (dir) + +FFTW User Manual +**************** + +Welcome to FFTW, the Fastest Fourier Transform in the West. FFTW is a +collection of fast C routines to compute the discrete Fourier transform. +This manual documents FFTW version 2.1.3. + +* Menu: + +* Introduction:: +* Tutorial:: +* FFTW Reference:: +* Parallel FFTW:: +* Calling FFTW from Fortran:: +* Installation and Customization:: +* Acknowledgments:: +* License and Copyright:: +* Concept Index:: +* Library Index:: + + +Tutorial + +* Complex One-dimensional Transforms Tutorial:: +* Complex Multi-dimensional Transforms Tutorial:: +* Real One-dimensional Transforms Tutorial:: +* Real Multi-dimensional Transforms Tutorial:: +* Multi-dimensional Array Format:: +* Words of Wisdom:: + +Multi-dimensional Array Format + +* Row-major Format:: +* Column-major Format:: +* Static Arrays in C:: +* Dynamic Arrays in C:: +* Dynamic Arrays in C-The Wrong Way:: + +Words of Wisdom + +* Caveats in Using Wisdom:: What you should worry about in using wisdom +* Importing and Exporting Wisdom:: I/O of wisdom to disk and other media + +FFTW Reference + +* Data Types:: real, complex, and halfcomplex numbers +* One-dimensional Transforms Reference:: +* Multi-dimensional Transforms Reference:: +* Real One-dimensional Transforms Reference:: +* Real Multi-dimensional Transforms Reference:: +* Wisdom Reference:: +* Memory Allocator Reference:: +* Thread safety:: + +One-dimensional Transforms Reference + +* fftw_create_plan:: Plan Creation +* Discussion on Specific Plans:: +* fftw:: Plan Execution +* fftw_destroy_plan:: Plan Destruction +* What FFTW Really Computes:: Definition of the DFT. + +Multi-dimensional Transforms Reference + +* fftwnd_create_plan:: Plan Creation +* fftwnd:: Plan Execution +* fftwnd_destroy_plan:: Plan Destruction +* What FFTWND Really Computes:: + +Real One-dimensional Transforms Reference + +* rfftw_create_plan:: Plan Creation +* rfftw:: Plan Execution +* rfftw_destroy_plan:: Plan Destruction +* What RFFTW Really Computes:: + +Real Multi-dimensional Transforms Reference + +* rfftwnd_create_plan:: Plan Creation +* rfftwnd:: Plan Execution +* Array Dimensions for Real Multi-dimensional Transforms:: +* Strides in In-place RFFTWND:: +* rfftwnd_destroy_plan:: Plan Destruction +* What RFFTWND Really Computes:: + +Wisdom Reference + +* fftw_export_wisdom:: +* fftw_import_wisdom:: +* fftw_forget_wisdom:: + +Parallel FFTW + +* Multi-threaded FFTW:: +* MPI FFTW:: + +Multi-threaded FFTW + +* Installation and Supported Hardware/Software:: +* Usage of Multi-threaded FFTW:: +* How Many Threads to Use?:: +* Using Multi-threaded FFTW in a Multi-threaded Program:: +* Tips for Optimal Threading:: + +MPI FFTW + +* MPI FFTW Installation:: +* Usage of MPI FFTW for Complex Multi-dimensional Transforms:: +* MPI Data Layout:: +* Usage of MPI FFTW for Real Multi-dimensional Transforms:: +* Usage of MPI FFTW for Complex One-dimensional Transforms:: +* MPI Tips:: + +Calling FFTW from Fortran + +* Wrapper Routines:: +* FFTW Constants in Fortran:: +* Fortran Examples:: + +Installation and Customization + +* Installation on Unix:: +* Installation on non-Unix Systems:: +* Installing FFTW in both single and double precision:: +* gcc and Pentium hacks:: +* Customizing the timer:: +* Generating your own code:: + + +File: fftw.info, Node: Introduction, Next: Tutorial, Prev: Top, Up: Top + +1 Introduction +************** + +This manual documents version 2.1.3 of FFTW, the _Fastest Fourier +Transform in the West_. FFTW is a comprehensive collection of fast C +routines for computing the discrete Fourier transform (DFT) in one or +more dimensions, of both real and complex data, and of arbitrary input +size. FFTW also includes parallel transforms for both shared- and +distributed-memory systems. We assume herein that the reader is already +familiar with the properties and uses of the DFT that are relevant to +her application. Otherwise, see e.g. `The Fast Fourier Transform' by +E. O. Brigham (Prentice-Hall, Englewood Cliffs, NJ, 1974). Our web +page (http://www.fftw.org) also has links to FFT-related information +online. + + FFTW is usually faster (and sometimes much faster) than all other +freely-available Fourier transform programs found on the Net. For +transforms whose size is a power of two, it compares favorably with the +FFT codes in Sun's Performance Library and IBM's ESSL library, which are +targeted at specific machines. Moreover, FFTW's performance is +_portable_. Indeed, FFTW is unique in that it automatically adapts +itself to your machine, your cache, the size of your memory, the number +of registers, and all the other factors that normally make it impossible +to optimize a program for more than one machine. An extensive +comparison of FFTW's performance with that of other Fourier transform +codes has been made. The results are available on the Web at the +benchFFT home page (http://theory.lcs.mit.edu/~benchfft). + + In order to use FFTW effectively, you need to understand one basic +concept of FFTW's internal structure. FFTW does not used a fixed +algorithm for computing the transform, but it can adapt the DFT +algorithm to details of the underlying hardware in order to achieve best +performance. Hence, the computation of the transform is split into two +phases. First, FFTW's "planner" is called, which "learns" the fastest +way to compute the transform on your machine. The planner produces a +data structure called a "plan" that contains this information. +Subsequently, the plan is passed to FFTW's "executor", along with an +array of input data. The executor computes the actual transform, as +dictated by the plan. The plan can be reused as many times as needed. +In typical high-performance applications, many transforms of the same +size are computed, and consequently a relatively-expensive +initialization of this sort is acceptable. On the other hand, if you +need a single transform of a given size, the one-time cost of the +planner becomes significant. For this case, FFTW provides fast +planners based on heuristics or on previously computed plans. + + The pattern of planning/execution applies to all four operation +modes of FFTW, that is, I) one-dimensional complex transforms (FFTW), +II) multi-dimensional complex transforms (FFTWND), III) one-dimensional +transforms of real data (RFFTW), IV) multi-dimensional transforms of +real data (RFFTWND). Each mode comes with its own planner and executor. + + Besides the automatic performance adaptation performed by the +planner, it is also possible for advanced users to customize FFTW for +their special needs. As distributed, FFTW works most efficiently for +arrays whose size can be factored into small primes (2, 3, 5, and 7), +and uses a slower general-purpose routine for other factors. FFTW, +however, comes with a code generator that can produce fast C programs +for any particular array size you may care about. For example, if you +need transforms of size 513 = 19 x 3^3, you can customize FFTW to +support the factor 19 efficiently. + + FFTW can exploit multiple processors if you have them. FFTW comes +with a shared-memory implementation on top of POSIX (and similar) +threads, as well as a distributed-memory implementation based on MPI. We +also provide an experimental parallel implementation written in Cilk, +_the superior programming tool of choice for discriminating hackers_ +(Olin Shivers). (See the Cilk home page +(http://supertech.lcs.mit.edu/cilk).) + + For more information regarding FFTW, see the paper, "The Fastest +Fourier Transform in the West," by M. Frigo and S. G. Johnson, which is +the technical report MIT-LCS-TR-728 (Sep. '97). See also, "FFTW: An +Adaptive Software Architecture for the FFT," by M. Frigo and S. G. +Johnson, which appeared in the 23rd International Conference on +Acoustics, Speech, and Signal Processing (`Proc. ICASSP 1998' 3, p. +1381). The code generator is described in the paper "A Fast Fourier +Transform Compiler", by M. Frigo, to appear in the `Proceedings of the +1999 ACM SIGPLAN Conference on Programming Language Design and +Implementation (PLDI), Atlanta, Georgia, May 1999'. These papers, +along with the latest version of FFTW, the FAQ, benchmarks, and other +links, are available at the FFTW home page (http://www.fftw.org). The +current version of FFTW incorporates many good ideas from the past +thirty years of FFT literature. In one way or another, FFTW uses the +Cooley-Tukey algorithm, the Prime Factor algorithm, Rader's algorithm +for prime sizes, and the split-radix algorithm (with a variation due to +Dan Bernstein). Our code generator also produces new algorithms that +we do not yet completely understand. The reader is referred to the +cited papers for the appropriate references. + + The rest of this manual is organized as follows. We first discuss +the sequential (one-processor) implementation. We start by describing +the basic features of FFTW in *Note Tutorial::. This discussion +includes the storage scheme of multi-dimensional arrays (*Note +Multi-dimensional Array Format::) and FFTW's mechanisms for storing +plans on disk (*Note Words of Wisdom::). Next, *Note FFTW Reference:: +provides comprehensive documentation of all FFTW's features. Parallel +transforms are discussed in their own chapter *Note Parallel FFTW::. +Fortran programmers can also use FFTW, as described in *Note Calling +FFTW from Fortran::. *Note Installation and Customization:: explains +how to install FFTW in your computer system and how to adapt FFTW to +your needs. License and copyright information is given in *Note +License and Copyright::. Finally, we thank all the people who helped +us in *Note Acknowledgments::. + + +File: fftw.info, Node: Tutorial, Next: FFTW Reference, Prev: Introduction, Up: Top + +2 Tutorial +********** + +This chapter describes the basic usage of FFTW, i.e., how to compute the +Fourier transform of a single array. This chapter tells the truth, but +not the _whole_ truth. Specifically, FFTW implements additional +routines and flags, providing extra functionality, that are not +documented here. *Note FFTW Reference::, for more complete information. +(Note that you need to compile and install FFTW before you can use it in +a program. *Note Installation and Customization::, for the details of +the installation.) + + Here, we assume a default installation of FFTW. In some +installations (particulary from binary packages), the FFTW header files +and libraries are prefixed with ``d'' or ``s'' to indicate versions in +double or single precision, respectively. The usage of FFTW in that +case is the same, except that `#include' directives and link commands +must use the appropriate prefix. *Note Installing FFTW in both single +and double precision::, for more information. + + This tutorial chapter is structured as follows. *Note Complex +One-dimensional Transforms Tutorial:: describes the basic usage of the +one-dimensional transform of complex data. *Note Complex +Multi-dimensional Transforms Tutorial:: describes the basic usage of the +multi-dimensional transform of complex data. *Note Real +One-dimensional Transforms Tutorial:: describes the one-dimensional +transform of real data and its inverse. Finally, *Note Real +Multi-dimensional Transforms Tutorial:: describes the multi-dimensional +transform of real data and its inverse. We recommend that you read +these sections in the order that they are presented. We then discuss +two topics in detail. In *Note Multi-dimensional Array Format::, we +discuss the various alternatives for storing multi-dimensional arrays +in memory. *Note Words of Wisdom:: shows how you can save FFTW's plans +for future use. + +* Menu: + +* Complex One-dimensional Transforms Tutorial:: +* Complex Multi-dimensional Transforms Tutorial:: +* Real One-dimensional Transforms Tutorial:: +* Real Multi-dimensional Transforms Tutorial:: +* Multi-dimensional Array Format:: +* Words of Wisdom:: + + +File: fftw.info, Node: Complex One-dimensional Transforms Tutorial, Next: Complex Multi-dimensional Transforms Tutorial, Prev: Tutorial, Up: Tutorial + +2.1 Complex One-dimensional Transforms Tutorial +=============================================== + +The basic usage of FFTW is simple. A typical call to FFTW looks like: + + #include + ... + { + fftw_complex in[N], out[N]; + fftw_plan p; + ... + p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE); + ... + fftw_one(p, in, out); + ... + fftw_destroy_plan(p); + } + + The first thing we do is to create a "plan", which is an object that +contains all the data that FFTW needs to compute the FFT, using the +following function: + + fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags); + + The first argument, `n', is the size of the transform you are trying +to compute. The size `n' can be any positive integer, but sizes that +are products of small factors are transformed most efficiently. The +second argument, `dir', can be either `FFTW_FORWARD' or +`FFTW_BACKWARD', and indicates the direction of the transform you are +interested in. Alternatively, you can use the sign of the exponent in +the transform, -1 or +1, which corresponds to `FFTW_FORWARD' or +`FFTW_BACKWARD' respectively. The `flags' argument is either +`FFTW_MEASURE' or `FFTW_ESTIMATE'. `FFTW_MEASURE' means that FFTW +actually runs and measures the execution time of several FFTs in order +to find the best way to compute the transform of size `n'. This may +take some time, depending on your installation and on the precision of +the timer in your machine. `FFTW_ESTIMATE', on the contrary, does not +run any computation, and just builds a reasonable plan, which may be +sub-optimal. In other words, if your program performs many transforms +of the same size and initialization time is not important, use +`FFTW_MEASURE'; otherwise use the estimate. (A compromise between +these two extremes exists. *Note Words of Wisdom::.) + + Once the plan has been created, you can use it as many times as you +like for transforms on arrays of the same size. When you are done with +the plan, you deallocate it by calling `fftw_destroy_plan(plan)'. + + The transform itself is computed by passing the plan along with the +input and output arrays to `fftw_one': + + void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out); + + Note that the transform is out of place: `in' and `out' must point +to distinct arrays. It operates on data of type `fftw_complex', a data +structure with real (`in[i].re') and imaginary (`in[i].im') +floating-point components. The `in' and `out' arrays should have the +length specified when the plan was created. An alternative function, +`fftw', allows you to efficiently perform multiple and/or strided +transforms (*note FFTW Reference::). + + The DFT results are stored in-order in the array `out', with the +zero-frequency (DC) component in `out[0]'. The array `in' is not +modified. Users should note that FFTW computes an unnormalized DFT, +the sign of whose exponent is given by the `dir' parameter of +`fftw_create_plan'. Thus, computing a forward followed by a backward +transform (or vice versa) results in the original array scaled by `n'. +*Note What FFTW Really Computes::, for the definition of DFT. + + A program using FFTW should be linked with `-lfftw -lm' on Unix +systems, or with the FFTW and standard math libraries in general. + + +File: fftw.info, Node: Complex Multi-dimensional Transforms Tutorial, Next: Real One-dimensional Transforms Tutorial, Prev: Complex One-dimensional Transforms Tutorial, Up: Tutorial + +2.2 Complex Multi-dimensional Transforms Tutorial +================================================= + +FFTW can also compute transforms of any number of dimensions ("rank"). +The syntax is similar to that for the one-dimensional transforms, with +`fftw_' replaced by `fftwnd_' (which stands for "`fftw' in `N' +dimensions"). + + As before, we `#include ' and create a plan for the +transforms, this time of type `fftwnd_plan': + + fftwnd_plan fftwnd_create_plan(int rank, const int *n, + fftw_direction dir, int flags); + + `rank' is the dimensionality of the array, and can be any +non-negative integer. The next argument, `n', is a pointer to an +integer array of length `rank' containing the (positive) sizes of each +dimension of the array. (Note that the array will be stored in +row-major order. *Note Multi-dimensional Array Format::, for information +on row-major order.) The last two parameters are the same as in +`fftw_create_plan'. We now, however, have an additional possible flag, +`FFTW_IN_PLACE', since `fftwnd' supports true in-place transforms. +Multiple flags are combined using a bitwise "or" (`|'). (An "in-place" +transform is one in which the output data overwrite the input data. It +thus requires half as much memory as--and is often faster than--its +opposite, an "out-of-place" transform.) + + For two- and three-dimensional transforms, FFTWND provides +alternative routines that accept the sizes of each dimension directly, +rather than indirectly through a rank and an array of sizes. These are +otherwise identical to `fftwnd_create_plan', and are sometimes more +convenient: + + fftwnd_plan fftw2d_create_plan(int nx, int ny, + fftw_direction dir, int flags); + fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz, + fftw_direction dir, int flags); + + Once the plan has been created, you can use it any number of times +for transforms of the same size. When you do not need a plan anymore, +you can deallocate the plan by calling `fftwnd_destroy_plan(plan)'. + + Given a plan, you can compute the transform of an array of data by +calling: + + void fftwnd_one(fftwnd_plan plan, fftw_complex *in, fftw_complex *out); + + Here, `in' and `out' point to multi-dimensional arrays in row-major +order, of the size specified when the plan was created. In the case of +an in-place transform, the `out' parameter is ignored and the output +data are stored in the `in' array. The results are stored in-order, +unnormalized, with the zero-frequency component in `out[0]'. A forward +followed by a backward transform (or vice-versa) yields the original +data multiplied by the size of the array (i.e. the product of the +dimensions). *Note What FFTWND Really Computes::, for a discussion of +what FFTWND computes. + + For example, code to perform an in-place FFT of a three-dimensional +array might look like: + + #include + ... + { + fftw_complex in[L][M][N]; + fftwnd_plan p; + ... + p = fftw3d_create_plan(L, M, N, FFTW_FORWARD, + FFTW_MEASURE | FFTW_IN_PLACE); + ... + fftwnd_one(p, &in[0][0][0], NULL); + ... + fftwnd_destroy_plan(p); + } + + Note that `in' is a statically-declared array, which is +automatically in row-major order, but we must take the address of the +first element in order to fit the type expected by `fftwnd_one'. +(*Note Multi-dimensional Array Format::.) + + +File: fftw.info, Node: Real One-dimensional Transforms Tutorial, Next: Real Multi-dimensional Transforms Tutorial, Prev: Complex Multi-dimensional Transforms Tutorial, Up: Tutorial + +2.3 Real One-dimensional Transforms Tutorial +============================================ + +If the input data are purely real, you can save roughly a factor of two +in both time and storage by using the "rfftw" transforms, which are +FFTs specialized for real data. The output of a such a transform is a +"halfcomplex" array, which consists of only half of the complex DFT +amplitudes (since the negative-frequency amplitudes for real data are +the complex conjugate of the positive-frequency amplitudes). + + In exchange for these speed and space advantages, the user sacrifices +some of the simplicity of FFTW's complex transforms. First of all, to +allow maximum performance, the output format of the one-dimensional real +transforms is different from that used by the multi-dimensional +transforms. Second, the inverse transform (halfcomplex to real) has the +side-effect of destroying its input array. Neither of these +inconveniences should pose a serious problem for users, but it is +important to be aware of them. (Both the inconvenient output format +and the side-effect of the inverse transform can be ameliorated for +one-dimensional transforms, at the expense of some performance, by using +instead the multi-dimensional transform routines with a rank of one.) + + The computation of the plan is similar to that for the complex +transforms. First, you `#include '. Then, you create a plan +(of type `rfftw_plan') by calling: + + rfftw_plan rfftw_create_plan(int n, fftw_direction dir, int flags); + + `n' is the length of the _real_ array in the transform (even for +halfcomplex-to-real transforms), and can be any positive integer +(although sizes with small factors are transformed more efficiently). +`dir' is either `FFTW_REAL_TO_COMPLEX' or `FFTW_COMPLEX_TO_REAL'. The +`flags' parameter is the same as in `fftw_create_plan'. + + Once created, a plan can be used for any number of transforms, and is +deallocated when you are done with it by calling +`rfftw_destroy_plan(plan)'. + + Given a plan, a real-to-complex or complex-to-real transform is +computed by calling: + + void rfftw_one(rfftw_plan plan, fftw_real *in, fftw_real *out); + + (Note that `fftw_real' is an alias for the floating-point type for +which FFTW was compiled.) Depending upon the direction of the plan, +either the input or the output array is halfcomplex, and is stored in +the following format: + + r0, r1, r2, r(n/2), i((n+1)/2-1), ..., i2, i1 + + Here, rk is the real part of the kth output, and ik is the imaginary +part. (We follow here the C convention that integer division is +rounded down, e.g. 7 / 2 = 3.) For a halfcomplex array `hc[]', the kth +component has its real part in `hc[k]' and its imaginary part in +`hc[n-k]', with the exception of `k' `==' `0' or `n/2' (the latter only +if n is even)--in these two cases, the imaginary part is zero due to +symmetries of the real-complex transform, and is not stored. Thus, the +transform of `n' real values is a halfcomplex array of length `n', and +vice versa. (1) This is actually only half of the DFT spectrum of the +data. Although the other half can be obtained by complex conjugation, +it is not required by many applications such as convolution and +filtering. + + Like the complex transforms, the RFFTW transforms are unnormalized, +so a forward followed by a backward transform (or vice-versa) yields the +original data scaled by the length of the array, `n'. + + Let us reiterate here our warning that an `FFTW_COMPLEX_TO_REAL' +transform has the side-effect of destroying its (halfcomplex) input. +The `FFTW_REAL_TO_COMPLEX' transform, however, leaves its (real) input +untouched, just as you would hope. + + As an example, here is an outline of how you might use RFFTW to +compute the power spectrum of a real array (i.e. the squares of the +absolute values of the DFT amplitudes): + + #include + ... + { + fftw_real in[N], out[N], power_spectrum[N/2+1]; + rfftw_plan p; + int k; + ... + p = rfftw_create_plan(N, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); + ... + rfftw_one(p, in, out); + power_spectrum[0] = out[0]*out[0]; /* DC component */ + for (k = 1; k < (N+1)/2; ++k) /* (k < N/2 rounded up) */ + power_spectrum[k] = out[k]*out[k] + out[N-k]*out[N-k]; + if (N % 2 == 0) /* N is even */ + power_spectrum[N/2] = out[N/2]*out[N/2]; /* Nyquist freq. */ + ... + rfftw_destroy_plan(p); + } + + Programs using RFFTW should link with `-lrfftw -lfftw -lm' on Unix, +or with the FFTW, RFFTW, and math libraries in general. + + ---------- Footnotes ---------- + + (1) The output for the multi-dimensional rfftw is a +more-conventional array of `fftw_complex' values, but the format here +permitted us greater efficiency in one dimension. + + +File: fftw.info, Node: Real Multi-dimensional Transforms Tutorial, Next: Multi-dimensional Array Format, Prev: Real One-dimensional Transforms Tutorial, Up: Tutorial + +2.4 Real Multi-dimensional Transforms Tutorial +============================================== + +FFTW includes multi-dimensional transforms for real data of any rank. +As with the one-dimensional real transforms, they save roughly a factor +of two in time and storage over complex transforms of the same size. +Also as in one dimension, these gains come at the expense of some +increase in complexity--the output format is different from the +one-dimensional RFFTW (and is more similar to that of the complex FFTW) +and the inverse (complex to real) transforms have the side-effect of +overwriting their input data. + + To use the real multi-dimensional transforms, you first `#include +' and then create a plan for the size and direction of +transform that you are interested in: + + rfftwnd_plan rfftwnd_create_plan(int rank, const int *n, + fftw_direction dir, int flags); + + The first two parameters describe the size of the real data (not the +halfcomplex data, which will have different dimensions). The last two +parameters are the same as those for `rfftw_create_plan'. Just as for +fftwnd, there are two alternate versions of this routine, +`rfftw2d_create_plan' and `rfftw3d_create_plan', that are sometimes +more convenient for two- and three-dimensional transforms. Also as in +fftwnd, rfftwnd supports true in-place transforms, specified by +including `FFTW_IN_PLACE' in the flags. + + Once created, a plan can be used for any number of transforms, and is +deallocated by calling `rfftwnd_destroy_plan(plan)'. + + Given a plan, the transform is computed by calling one of the +following two routines: + + void rfftwnd_one_real_to_complex(rfftwnd_plan plan, + fftw_real *in, fftw_complex *out); + void rfftwnd_one_complex_to_real(rfftwnd_plan plan, + fftw_complex *in, fftw_real *out); + + As is clear from their names and parameter types, the former +function is for `FFTW_REAL_TO_COMPLEX' transforms and the latter is for +`FFTW_COMPLEX_TO_REAL' transforms. (We could have used only a single +routine, since the direction of the transform is encoded in the plan, +but we wanted to correctly express the datatypes of the parameters.) +The latter routine, as we discuss elsewhere, has the side-effect of +overwriting its input (except when the rank of the array is one). In +both cases, the `out' parameter is ignored for in-place transforms. + + The format of the complex arrays deserves careful attention. Suppose +that the real data has dimensions n1 x n2 x ... x nd (in row-major +order). Then, after a real-to-complex transform, the output is an n1 x +n2 x ... x (nd/2+1) array of `fftw_complex' values in row-major order, +corresponding to slightly over half of the output of the corresponding +complex transform. (Note that the division is rounded down.) The +ordering of the data is otherwise exactly the same as in the complex +case. (In principle, the output could be exactly half the size of the +complex transform output, but in more than one dimension this requires +too complicated a format to be practical.) Note that, unlike the +one-dimensional RFFTW, the real and imaginary parts of the DFT +amplitudes are here stored together in the natural way. + + Since the complex data is slightly larger than the real data, some +complications arise for in-place transforms. In this case, the final +dimension of the real data must be padded with extra values to +accommodate the size of the complex data--two extra if the last +dimension is even and one if it is odd. That is, the last dimension of +the real data must physically contain 2 * (nd/2+1) `fftw_real' values +(exactly enough to hold the complex data). This physical array size +does not, however, change the _logical_ array size--only nd values are +actually stored in the last dimension, and nd is the last dimension +passed to `rfftwnd_create_plan'. + + For example, consider the transform of a two-dimensional real array +of size `nx' by `ny'. The output of the `rfftwnd' transform is a +two-dimensional real array of size `nx' by `ny/2+1', where the `y' +dimension has been cut nearly in half because of redundancies in the +output. Because `fftw_complex' is twice the size of `fftw_real', the +output array is slightly bigger than the input array. Thus, if we want +to compute the transform in place, we must _pad_ the input array so +that it is of size `nx' by `2*(ny/2+1)'. If `ny' is even, then there +are two padding elements at the end of each row (which need not be +initialized, as they are only used for output). + + The RFFTWND transforms are unnormalized, so a forward followed by a +backward transform will result in the original data scaled by the number +of real data elements--that is, the product of the (logical) dimensions +of the real data. + + Below, we illustrate the use of RFFTWND by showing how you might use +it to compute the (cyclic) convolution of two-dimensional real arrays +`a' and `b' (using the identity that a convolution corresponds to a +pointwise product of the Fourier transforms). For variety, in-place +transforms are used for the forward FFTs and an out-of-place transform +is used for the inverse transform. + + #include + ... + { + fftw_real a[M][2*(N/2+1)], b[M][2*(N/2+1)], c[M][N]; + fftw_complex *A, *B, C[M][N/2+1]; + rfftwnd_plan p, pinv; + fftw_real scale = 1.0 / (M * N); + int i, j; + ... + p = rfftw2d_create_plan(M, N, FFTW_REAL_TO_COMPLEX, + FFTW_ESTIMATE | FFTW_IN_PLACE); + pinv = rfftw2d_create_plan(M, N, FFTW_COMPLEX_TO_REAL, + FFTW_ESTIMATE); + + /* aliases for accessing complex transform outputs: */ + A = (fftw_complex*) &a[0][0]; + B = (fftw_complex*) &b[0][0]; + ... + for (i = 0; i < M; ++i) + for (j = 0; j < N; ++j) { + a[i][j] = ... ; + b[i][j] = ... ; + } + ... + rfftwnd_one_real_to_complex(p, &a[0][0], NULL); + rfftwnd_one_real_to_complex(p, &b[0][0], NULL); + + for (i = 0; i < M; ++i) + for (j = 0; j < N/2+1; ++j) { + int ij = i*(N/2+1) + j; + C[i][j].re = (A[ij].re * B[ij].re + - A[ij].im * B[ij].im) * scale; + C[i][j].im = (A[ij].re * B[ij].im + + A[ij].im * B[ij].re) * scale; + } + + /* inverse transform to get c, the convolution of a and b; + this has the side effect of overwriting C */ + rfftwnd_one_complex_to_real(pinv, &C[0][0], &c[0][0]); + ... + rfftwnd_destroy_plan(p); + rfftwnd_destroy_plan(pinv); + } + + We access the complex outputs of the in-place transforms by casting +each real array to a `fftw_complex' pointer. Because this is a "flat" +pointer, we have to compute the row-major index `ij' explicitly in the +convolution product loop. In order to normalize the convolution, we +must multiply by a scale factor--we can do so either before or after +the inverse transform, and choose the former because it obviates the +necessity of an additional loop. Notice the limits of the loops and +the dimensions of the various arrays. + + As with the one-dimensional RFFTW, an out-of-place +`FFTW_COMPLEX_TO_REAL' transform has the side-effect of overwriting its +input array. (The real-to-complex transform, on the other hand, leaves +its input array untouched.) If you use RFFTWND for a rank-one +transform, however, this side-effect does not occur. Because of this +fact (and the simpler output format), users may find the RFFTWND +interface more convenient than RFFTW for one-dimensional transforms. +However, RFFTWND in one dimension is slightly slower than RFFTW because +RFFTWND uses an extra buffer array internally. + + +File: fftw.info, Node: Multi-dimensional Array Format, Next: Words of Wisdom, Prev: Real Multi-dimensional Transforms Tutorial, Up: Tutorial + +2.5 Multi-dimensional Array Format +================================== + +This section describes the format in which multi-dimensional arrays are +stored. We felt that a detailed discussion of this topic was necessary, +since it is often a source of confusion among users and several +different formats are common. Although the comments below refer to +`fftwnd', they are also applicable to the `rfftwnd' routines. + +* Menu: + +* Row-major Format:: +* Column-major Format:: +* Static Arrays in C:: +* Dynamic Arrays in C:: +* Dynamic Arrays in C-The Wrong Way:: + + +File: fftw.info, Node: Row-major Format, Next: Column-major Format, Prev: Multi-dimensional Array Format, Up: Multi-dimensional Array Format + +2.5.1 Row-major Format +---------------------- + +The multi-dimensional arrays passed to `fftwnd' are expected to be +stored as a single contiguous block in "row-major" order (sometimes +called "C order"). Basically, this means that as you step through +adjacent memory locations, the first dimension's index varies most +slowly and the last dimension's index varies most quickly. + + To be more explicit, let us consider an array of rank d whose +dimensions are n1 x n2 x n3 x ... x nd. Now, we specify a location in +the array by a sequence of (zero-based) indices, one for each dimension: +(i1, i2, ..., id). If the array is stored in row-major order, then +this element is located at the position id + nd * (id-1 + nd-1 * (... + +n2 * i1)). + + Note that each element of the array must be of type `fftw_complex'; +i.e. a (real, imaginary) pair of (double-precision) numbers. Note also +that, in `fftwnd', the expression above is multiplied by the stride to +get the actual array index--this is useful in situations where each +element of the multi-dimensional array is actually a data structure or +another array, and you just want to transform a single field. In most +cases, however, you use a stride of 1. + + +File: fftw.info, Node: Column-major Format, Next: Static Arrays in C, Prev: Row-major Format, Up: Multi-dimensional Array Format + +2.5.2 Column-major Format +------------------------- + +Readers from the Fortran world are used to arrays stored in +"column-major" order (sometimes called "Fortran order"). This is +essentially the exact opposite of row-major order in that, here, the +_first_ dimension's index varies most quickly. + + If you have an array stored in column-major order and wish to +transform it using `fftwnd', it is quite easy to do. When creating the +plan, simply pass the dimensions of the array to `fftwnd_create_plan' in +_reverse order_. For example, if your array is a rank three `N x M x +L' matrix in column-major order, you should pass the dimensions of the +array as if it were an `L x M x N' matrix (which it is, from the +perspective of `fftwnd'). This is done for you automatically by the +FFTW Fortran wrapper routines (*note Calling FFTW from Fortran::). + + +File: fftw.info, Node: Static Arrays in C, Next: Dynamic Arrays in C, Prev: Column-major Format, Up: Multi-dimensional Array Format + +2.5.3 Static Arrays in C +------------------------ + +Multi-dimensional arrays declared statically (that is, at compile time, +not necessarily with the `static' keyword) in C are _already_ in +row-major order. You don't have to do anything special to transform +them. (*Note Complex Multi-dimensional Transforms Tutorial::, for an +example of this sort of code.) + + +File: fftw.info, Node: Dynamic Arrays in C, Next: Dynamic Arrays in C-The Wrong Way, Prev: Static Arrays in C, Up: Multi-dimensional Array Format + +2.5.4 Dynamic Arrays in C +------------------------- + +Often, especially for large arrays, it is desirable to allocate the +arrays dynamically, at runtime. This isn't too hard to do, although it +is not as straightforward for multi-dimensional arrays as it is for +one-dimensional arrays. + + Creating the array is simple: using a dynamic-allocation routine like +`malloc', allocate an array big enough to store N `fftw_complex' +values, where N is the product of the sizes of the array dimensions +(i.e. the total number of complex values in the array). For example, +here is code to allocate a 5x12x27 rank 3 array: + + fftw_complex *an_array; + + an_array = (fftw_complex *) malloc(5 * 12 * 27 * sizeof(fftw_complex)); + + Accessing the array elements, however, is more tricky--you can't +simply use multiple applications of the `[]' operator like you could for +static arrays. Instead, you have to explicitly compute the offset into +the array using the formula given earlier for row-major arrays. For +example, to reference the (i,j,k)-th element of the array allocated +above, you would use the expression `an_array[k + 27 * (j + 12 * i)]'. + + This pain can be alleviated somewhat by defining appropriate macros, +or, in C++, creating a class and overloading the `()' operator. + + +File: fftw.info, Node: Dynamic Arrays in C-The Wrong Way, Prev: Dynamic Arrays in C, Up: Multi-dimensional Array Format + +2.5.5 Dynamic Arrays in C--The Wrong Way +---------------------------------------- + +A different method for allocating multi-dimensional arrays in C is often +suggested that is incompatible with `fftwnd': _using it will cause FFTW +to die a painful death_. We discuss the technique here, however, +because it is so commonly known and used. This method is to create +arrays of pointers of arrays of pointers of ...etcetera. For example, +the analogue in this method to the example above is: + + int i,j; + fftw_complex ***a_bad_array; /* another way to make a 5x12x27 array */ + + a_bad_array = (fftw_complex ***) malloc(5 * sizeof(fftw_complex **)); + for (i = 0; i < 5; ++i) { + a_bad_array[i] = + (fftw_complex **) malloc(12 * sizeof(fftw_complex *)); + for (j = 0; j < 12; ++j) + a_bad_array[i][j] = + (fftw_complex *) malloc(27 * sizeof(fftw_complex)); + } + + As you can see, this sort of array is inconvenient to allocate (and +deallocate). On the other hand, it has the advantage that the +(i,j,k)-th element can be referenced simply by `a_bad_array[i][j][k]'. + + If you like this technique and want to maximize convenience in +accessing the array, but still want to pass the array to FFTW, you can +use a hybrid method. Allocate the array as one contiguous block, but +also declare an array of arrays of pointers that point to appropriate +places in the block. That sort of trick is beyond the scope of this +documentation; for more information on multi-dimensional arrays in C, +see the `comp.lang.c' FAQ (http://www.eskimo.com/~scs/C-faq/s6.html). + + +File: fftw.info, Node: Words of Wisdom, Prev: Multi-dimensional Array Format, Up: Tutorial + +2.6 Words of Wisdom +=================== + +FFTW implements a method for saving plans to disk and restoring them. +In fact, what FFTW does is more general than just saving and loading +plans. The mechanism is called "`wisdom'". Here, we describe this +feature at a high level. *Note FFTW Reference::, for a less casual (but +more complete) discussion of how to use `wisdom' in FFTW. + + Plans created with the `FFTW_MEASURE' option produce near-optimal +FFT performance, but it can take a long time to compute a plan because +FFTW must actually measure the runtime of many possible plans and select +the best one. This is designed for the situations where so many +transforms of the same size must be computed that the start-up time is +irrelevant. For short initialization times but slightly slower +transforms, we have provided `FFTW_ESTIMATE'. The `wisdom' mechanism +is a way to get the best of both worlds. There are, however, certain +caveats that the user must be aware of in using `wisdom'. For this +reason, `wisdom' is an optional feature which is not enabled by default. + + At its simplest, `wisdom' provides a way of saving plans to disk so +that they can be reused in other program runs. You create a plan with +the flags `FFTW_MEASURE' and `FFTW_USE_WISDOM', and then save the +`wisdom' using `fftw_export_wisdom': + + plan = fftw_create_plan(..., ... | FFTW_MEASURE | FFTW_USE_WISDOM); + fftw_export_wisdom(...); + + The next time you run the program, you can restore the `wisdom' with +`fftw_import_wisdom', and then recreate the plan using the same flags +as before. This time, however, the same optimal plan will be created +very quickly without measurements. (FFTW still needs some time to +compute trigonometric tables, however.) The basic outline is: + + fftw_import_wisdom(...); + plan = fftw_create_plan(..., ... | FFTW_USE_WISDOM); + + Wisdom is more than mere rote memorization, however. FFTW's +`wisdom' encompasses all of the knowledge and measurements that were +used to create the plan for a given size. Therefore, existing `wisdom' +is also applied to the creation of other plans of different sizes. + + Whenever a plan is created with the `FFTW_MEASURE' and +`FFTW_USE_WISDOM' flags, `wisdom' is generated. Thereafter, plans for +any transform with a similar factorization will be computed more +quickly, so long as they use the `FFTW_USE_WISDOM' flag. In fact, for +transforms with the same factors and of equal or lesser size, no +measurements at all need to be made and an optimal plan can be created +with negligible delay! + + For example, suppose that you create a plan for N = 2^16. Then, for +any equal or smaller power of two, FFTW can create a plan (with the +same direction and flags) quickly, using the precomputed `wisdom'. Even +for larger powers of two, or sizes that are a power of two times some +other prime factors, plans will be computed more quickly than they +would otherwise (although some measurements still have to be made). + + The `wisdom' is cumulative, and is stored in a global, private data +structure managed internally by FFTW. The storage space required is +minimal, proportional to the logarithm of the sizes the `wisdom' was +generated from. The `wisdom' can be forgotten (and its associated +memory freed) by a call to `fftw_forget_wisdom()'; otherwise, it is +remembered until the program terminates. It can also be exported to a +file, a string, or any other medium using `fftw_export_wisdom' and +restored during a subsequent execution of the program (or a different +program) using `fftw_import_wisdom' (these functions are described +below). + + Because `wisdom' is incorporated into FFTW at a very low level, the +same `wisdom' can be used for one-dimensional transforms, +multi-dimensional transforms, and even the parallel extensions to FFTW. +Just include `FFTW_USE_WISDOM' in the flags for whatever plans you +create (i.e., always plan wisely). + + Plans created with the `FFTW_ESTIMATE' plan can use `wisdom', but +cannot generate it; only `FFTW_MEASURE' plans actually produce +`wisdom'. Also, plans can only use `wisdom' generated from plans +created with the same direction and flags. For example, a size `42' +`FFTW_BACKWARD' transform will not use `wisdom' produced by a size `42' +`FFTW_FORWARD' transform. The only exception to this rule is that +`FFTW_ESTIMATE' plans can use `wisdom' from `FFTW_MEASURE' plans. + +* Menu: + +* Caveats in Using Wisdom:: What you should worry about in using wisdom +* Importing and Exporting Wisdom:: I/O of wisdom to disk and other media + + +File: fftw.info, Node: Caveats in Using Wisdom, Next: Importing and Exporting Wisdom, Prev: Words of Wisdom, Up: Words of Wisdom + +2.6.1 Caveats in Using Wisdom +----------------------------- + + For in much wisdom is much grief, and he that increaseth knowledge + increaseth sorrow. [Ecclesiastes 1:18] + + There are pitfalls to using `wisdom', in that it can negate FFTW's +ability to adapt to changing hardware and other conditions. For example, +it would be perfectly possible to export `wisdom' from a program +running on one processor and import it into a program running on another +processor. Doing so, however, would mean that the second program would +use plans optimized for the first processor, instead of the one it is +running on. + + It should be safe to reuse `wisdom' as long as the hardware and +program binaries remain unchanged. (Actually, the optimal plan may +change even between runs of the same binary on identical hardware, due +to differences in the virtual memory environment, etcetera. Users +seriously interested in performance should worry about this problem, +too.) It is likely that, if the same `wisdom' is used for two +different program binaries, even running on the same machine, the plans +may be sub-optimal because of differing code alignments. It is +therefore wise to recreate `wisdom' every time an application is +recompiled. The more the underlying hardware and software changes +between the creation of `wisdom' and its use, the greater grows the +risk of sub-optimal plans. + + +File: fftw.info, Node: Importing and Exporting Wisdom, Prev: Caveats in Using Wisdom, Up: Words of Wisdom + +2.6.2 Importing and Exporting Wisdom +------------------------------------ + + void fftw_export_wisdom_to_file(FILE *output_file); + fftw_status fftw_import_wisdom_from_file(FILE *input_file); + + `fftw_export_wisdom_to_file' writes the `wisdom' to `output_file', +which must be a file open for writing. `fftw_import_wisdom_from_file' +reads the `wisdom' from `input_file', which must be a file open for +reading, and returns `FFTW_SUCCESS' if successful and `FFTW_FAILURE' +otherwise. In both cases, the file is left open and must be closed by +the caller. It is perfectly fine if other data lie before or after the +`wisdom' in the file, as long as the file is positioned at the +beginning of the `wisdom' data before import. + + char *fftw_export_wisdom_to_string(void); + fftw_status fftw_import_wisdom_from_string(const char *input_string) + + `fftw_export_wisdom_to_string' allocates a string, exports the +`wisdom' to it in `NULL'-terminated format, and returns a pointer to +the string. If there is an error in allocating or writing the data, it +returns `NULL'. The caller is responsible for deallocating the string +(with `fftw_free') when she is done with it. +`fftw_import_wisdom_from_string' imports the `wisdom' from +`input_string', returning `FFTW_SUCCESS' if successful and +`FFTW_FAILURE' otherwise. + + Exporting `wisdom' does not affect the store of `wisdom'. Imported +`wisdom' supplements the current store rather than replacing it (except +when there is conflicting `wisdom', in which case the older `wisdom' is +discarded). The format of the exported `wisdom' is "nerd-readable" +LISP-like ASCII text; we will not document it here except to note that +it is insensitive to white space (interested users can contact us for +more details). + + *Note FFTW Reference::, for more information, and for a description +of how you can implement `wisdom' import/export for other media besides +files and strings. + + The following is a brief example in which the `wisdom' is read from +a file, a plan is created (possibly generating more `wisdom'), and then +the `wisdom' is exported to a string and printed to `stdout'. + + { + fftw_plan plan; + char *wisdom_string; + FILE *input_file; + + /* open file to read wisdom from */ + input_file = fopen("sample.wisdom", "r"); + if (FFTW_FAILURE == fftw_import_wisdom_from_file(input_file)) + printf("Error reading wisdom!\n"); + fclose(input_file); /* be sure to close the file! */ + + /* create a plan for N=64, possibly creating and/or using wisdom */ + plan = fftw_create_plan(64,FFTW_FORWARD, + FFTW_MEASURE | FFTW_USE_WISDOM); + + /* ... do some computations with the plan ... */ + + /* always destroy plans when you are done */ + fftw_destroy_plan(plan); + + /* write the wisdom to a string */ + wisdom_string = fftw_export_wisdom_to_string(); + if (wisdom_string != NULL) { + printf("Accumulated wisdom: %s\n",wisdom_string); + + /* Just for fun, destroy and restore the wisdom */ + fftw_forget_wisdom(); /* all gone! */ + fftw_import_wisdom_from_string(wisdom_string); + /* wisdom is back! */ + + fftw_free(wisdom_string); /* deallocate it since we're done */ + } + } + + +File: fftw.info, Node: FFTW Reference, Next: Parallel FFTW, Prev: Tutorial, Up: Top + +3 FFTW Reference +**************** + +This chapter provides a complete reference for all sequential (i.e., +one-processor) FFTW functions. We first define the data types upon +which FFTW operates, that is, real, complex, and "halfcomplex" numbers +(*note Data Types::). Then, in four sections, we explain the FFTW +program interface for complex one-dimensional transforms (*note +One-dimensional Transforms Reference::), complex multi-dimensional +transforms (*note Multi-dimensional Transforms Reference::), and real +one-dimensional transforms (*note Real One-dimensional Transforms +Reference::), real multi-dimensional transforms (*note Real +Multi-dimensional Transforms Reference::). *Note Wisdom Reference:: +describes the `wisdom' mechanism for exporting and importing plans. +Finally, *Note Memory Allocator Reference:: describes how to change +FFTW's default memory allocator. For parallel transforms, *Note +Parallel FFTW::. + +* Menu: + +* Data Types:: real, complex, and halfcomplex numbers +* One-dimensional Transforms Reference:: +* Multi-dimensional Transforms Reference:: +* Real One-dimensional Transforms Reference:: +* Real Multi-dimensional Transforms Reference:: +* Wisdom Reference:: +* Memory Allocator Reference:: +* Thread safety:: + + +File: fftw.info, Node: Data Types, Next: One-dimensional Transforms Reference, Prev: FFTW Reference, Up: FFTW Reference + +3.1 Data Types +============== + +The routines in the FFTW package use three main kinds of data types. +"Real" and "complex" numbers should be already known to the reader. We +also use the term "halfcomplex" to describe complex arrays in a special +packed format used by the one-dimensional real transforms (taking +advantage of the "hermitian" symmetry that arises in those cases). + + By including `' or `', you will have access to the +following definitions: + + typedef double fftw_real; + + typedef struct { + fftw_real re, im; + } fftw_complex; + + #define c_re(c) ((c).re) + #define c_im(c) ((c).im) + + All FFTW operations are performed on the `fftw_real' and +`fftw_complex' data types. For `fftw_complex' numbers, the two macros +`c_re' and `c_im' retrieve, respectively, the real and imaginary parts +of the number. + + A "real array" is an array of real numbers. A "complex array" is an +array of complex numbers. A one-dimensional array X of n complex +numbers is "hermitian" if the following property holds: for all 0 <= i +< n, we have X[i] = conj(X[n-i]). Hermitian arrays are relevant to +FFTW because the Fourier transform of a real array is hermitian. + + Because of its symmetry, a hermitian array can be stored in half the +space of a complex array of the same size. FFTW's one-dimensional real +transforms store hermitian arrays as "halfcomplex" arrays. A +halfcomplex array of size n is a one-dimensional array of n `fftw_real' +numbers. A hermitian array X in stored into a halfcomplex array Y as +follows. For all integers i such that 0 <= i <= n / 2, we have Y[i] = +Re(X[i]). For all integers i such that 0 < i < n / 2, we have Y[n-i] = +Im(X[i]). + + We now illustrate halfcomplex storage for n = 4 and n = 5, since the +scheme depends on the parity of n. Let n = 4. In this case, we have +Y[0] = Re(X[0]), Y[1] = Re(X[1]), Y[2] = Re(X[2]), and Y[3] = Im(X[1]). +Let now n = 5. In this case, we have Y[0] = Re(X[0]), Y[1] = Re(X[1]), +Y[2] = Re(X[2]), Y[3] = Im(X[2]), and Y[4] = Im(X[1]). + + By default, the type `fftw_real' equals the C type `double'. To +work in single precision rather than double precision, `#define' the +symbol `FFTW_ENABLE_FLOAT' in `fftw.h' and then recompile the library. +On Unix systems, you can instead use `configure --enable-float' at +installation time (*note Installation and Customization::). + + In version 1 of FFTW, the data types were called `FFTW_REAL' and +`FFTW_COMPLEX'. We changed the capitalization for consistency with the +rest of FFTW's conventions. The old names are still supported, but +their use is deprecated. + + +File: fftw.info, Node: One-dimensional Transforms Reference, Next: Multi-dimensional Transforms Reference, Prev: Data Types, Up: FFTW Reference + +3.2 One-dimensional Transforms Reference +======================================== + +The one-dimensional complex routines are generally prefixed with +`fftw_'. Programs using FFTW should be linked with `-lfftw -lm' on +Unix systems, or with the FFTW and standard math libraries in general. + +* Menu: + +* fftw_create_plan:: Plan Creation +* Discussion on Specific Plans:: +* fftw:: Plan Execution +* fftw_destroy_plan:: Plan Destruction +* What FFTW Really Computes:: Definition of the DFT. + + +File: fftw.info, Node: fftw_create_plan, Next: Discussion on Specific Plans, Prev: One-dimensional Transforms Reference, Up: One-dimensional Transforms Reference + +3.2.1 Plan Creation for One-dimensional Transforms +-------------------------------------------------- + + #include + + fftw_plan fftw_create_plan(int n, fftw_direction dir, + int flags); + + fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); + + The function `fftw_create_plan' creates a plan, which is a data +structure containing all the information that `fftw' needs in order to +compute the 1D Fourier transform. You can create as many plans as you +need, but only one plan for a given array size is required (a plan can +be reused many times). + + `fftw_create_plan' returns a valid plan, or `NULL' if, for some +reason, the plan can't be created. In the default installation, this +cannot happen, but it is possible to configure FFTW in such a way that +some input sizes are forbidden, and FFTW cannot create a plan. + + The `fftw_create_plan_specific' variant takes as additional +arguments specific input/output arrays and their strides. For the last +four arguments, you should pass the arrays and strides that you will +eventually be passing to `fftw'. The resulting plans will be optimized +for those arrays and strides, although they may be used on other arrays +as well. Note: the contents of the in and out arrays are _destroyed_ +by the specific planner (the initial contents are ignored, so the +arrays need not have been initialized). + +Arguments +......... + + * `n' is the size of the transform. It can be any positive integer. + + - FFTW is best at handling sizes of the form 2^a 3^b 5^c 7^d + 11^e 13^f, where e+f is either 0 or 1, and the other + exponents are arbitrary. Other sizes are computed by means + of a slow, general-purpose routine (which nevertheless retains + O(n lg n) performance, even for prime sizes). (It is + possible to customize FFTW for different array sizes. *Note + Installation and Customization::, for more information.) + Transforms whose sizes are powers of 2 are especially fast. + + * `dir' is the sign of the exponent in the formula that defines the + Fourier transform. It can be -1 or +1. The aliases + `FFTW_FORWARD' and `FFTW_BACKWARD' are provided, where + `FFTW_FORWARD' stands for -1. + + * `flags' is a boolean OR (`|') of zero or more of the following: + - `FFTW_MEASURE': this flag tells FFTW to find the optimal plan + by actually _computing_ several FFTs and measuring their + execution time. Depending on the installation, this can take + some time. (1) + + - `FFTW_ESTIMATE': do not run any FFT and provide a "reasonable" + plan (for a RISC processor with many registers). If neither + `FFTW_ESTIMATE' nor `FFTW_MEASURE' is provided, the default is + `FFTW_ESTIMATE'. + + - `FFTW_OUT_OF_PLACE': produce a plan assuming that the input + and output arrays will be distinct (this is the default). + + - `FFTW_IN_PLACE': produce a plan assuming that you want the + output in the input array. The algorithm used is not + necessarily in place: FFTW is able to compute true in-place + transforms only for small values of `n'. If FFTW is not able + to compute the transform in-place, it will allocate a + temporary array (unless you provide one yourself), compute + the transform out of place, and copy the result back. + _Warning: This option changes the meaning of some parameters + of `fftw'_ (*note Computing the One-dimensional Transform: + fftw.). + + The in-place option is mainly provided for people who want to + write their own in-place multi-dimensional Fourier transform, + using FFTW as a base. For example, consider a + three-dimensional `n * n * n' transform. An out-of-place + algorithm will need another array (which may be huge). + However, FFTW can compute the in-place transform along each + dimension using only a temporary array of size `n'. + Moreover, if FFTW happens to be able to compute the transform + truly in-place, no temporary array and no copying are needed. + As distributed, FFTW `knows' how to compute in-place + transforms of size 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 32 and 64. + + The default mode of operation is `FFTW_OUT_OF_PLACE'. + + - `FFTW_USE_WISDOM': use any `wisdom' that is available to help + in the creation of the plan. (*Note Words of Wisdom::.) This + can greatly speed the creation of plans, especially with the + `FFTW_MEASURE' option. `FFTW_ESTIMATE' plans can also take + advantage of `wisdom' to produce a more optimal plan (based + on past measurements) than the estimation heuristic would + normally generate. When the `FFTW_MEASURE' option is used, + new `wisdom' will also be generated if the current transform + size is not completely understood by existing `wisdom'. + + + * `in', `out', `istride', `ostride' (only for + `fftw_create_plan_specific'): see corresponding arguments in the + description of `fftw'. (*Note Computing the One-dimensional + Transform: fftw.) In particular, the `out' and `ostride' + parameters have the same special meaning for `FFTW_IN_PLACE' + transforms as they have for `fftw'. + + + ---------- Footnotes ---------- + + (1) The basic problem is the resolution of the clock: FFTW needs to +run for a certain time for the clock to be reliable. + + +File: fftw.info, Node: Discussion on Specific Plans, Next: fftw, Prev: fftw_create_plan, Up: One-dimensional Transforms Reference + +3.2.2 Discussion on Specific Plans +---------------------------------- + +We recommend the use of the specific planners, even in cases where you +will be transforming arrays different from those passed to the specific +planners, as they confer the following advantages: + + * The resulting plans will be optimized for your specific arrays and + strides. This may or may not make a significant difference, but it + certainly doesn't hurt. (The ordinary planner does its planning + based upon a stride-one temporary array that it allocates.) + + * Less intermediate storage is required during the planning process. + (The ordinary planner uses O(`N') temporary storage, where `N' is + the maximum dimension, while it is creating the plan.) + + * For multi-dimensional transforms, new parameters become accessible + for optimization by the planner. (Since multi-dimensional arrays + can be very large, we don't dare to allocate one in the ordinary + planner for experimentation. This prevents us from doing certain + optimizations that can yield dramatic improvements in some cases.) + + + On the other hand, note that _the specific planner destroys the +contents of the `in' and `out' arrays_. + + +File: fftw.info, Node: fftw, Next: fftw_destroy_plan, Prev: Discussion on Specific Plans, Up: One-dimensional Transforms Reference + +3.2.3 Computing the One-dimensional Transform +--------------------------------------------- + + #include + + void fftw(fftw_plan plan, int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist); + + void fftw_one(fftw_plan plan, fftw_complex *in, + fftw_complex *out); + + The function `fftw' computes the one-dimensional Fourier transform, +using a plan created by `fftw_create_plan' (*Note Plan Creation for +One-dimensional Transforms: fftw_create_plan.) The function `fftw_one' +provides a simplified interface for the common case of single input +array of stride 1. + +Arguments +......... + + * `plan' is the plan created by `fftw_create_plan' (*note Plan + Creation for One-dimensional Transforms: fftw_create_plan.). + + * `howmany' is the number of transforms `fftw' will compute. It is + faster to tell FFTW to compute many transforms, instead of simply + calling `fftw' many times. + + * `in', `istride' and `idist' describe the input array(s). There + are `howmany' input arrays; the first one is pointed to by `in', + the second one is pointed to by `in + idist', and so on, up to `in + + (howmany - 1) * idist'. Each input array consists of complex + numbers (*note Data Types::), which are not necessarily contiguous + in memory. Specifically, `in[0]' is the first element of the + first array, `in[istride]' is the second element of the first + array, and so on. In general, the `i'-th element of the `j'-th + input array will be in position `in[i * istride + j * idist]'. + + * `out', `ostride' and `odist' describe the output array(s). The + format is the same as for the input array. + + - _In-place transforms_: If the `plan' specifies an in-place + transform, `ostride' and `odist' are always ignored. If + `out' is `NULL', `out' is ignored, too. Otherwise, `out' is + interpreted as a pointer to an array of `n' complex numbers, + that FFTW will use as temporary space to perform the in-place + computation. `out' is used as scratch space and its contents + destroyed. In this case, `out' must be an ordinary array + whose elements are contiguous in memory (no striding). + + + The function `fftw_one' transforms a single, contiguous input array +to a contiguous output array. By definition, the call + fftw_one(plan, in, out) + is equivalent to + fftw(plan, 1, in, 1, 1, out, 1, 1) + + +File: fftw.info, Node: fftw_destroy_plan, Next: What FFTW Really Computes, Prev: fftw, Up: One-dimensional Transforms Reference + +3.2.4 Destroying a One-dimensional Plan +--------------------------------------- + + #include + + void fftw_destroy_plan(fftw_plan plan); + + The function `fftw_destroy_plan' frees the plan `plan' and releases +all the memory associated with it. After destruction, a plan is no +longer valid. + + +File: fftw.info, Node: What FFTW Really Computes, Prev: fftw_destroy_plan, Up: One-dimensional Transforms Reference + +3.2.5 What FFTW Really Computes +------------------------------- + +In this section, we define precisely what FFTW computes. Please be +warned that different authors and software packages might employ +different conventions than FFTW does. + + The forward transform of a complex array X of size n computes an +array Y, where Y[i] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi i j sqrt(-1)/n) . + The backward transform computes Y[i] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi i j sqrt(-1)/n) . + FFTW computes an unnormalized transform, that is, the equation +IFFT(FFT(X)) = n X holds. In other words, applying the forward and +then the backward transform will multiply the input by n. + + An `FFTW_FORWARD' transform corresponds to a sign of -1 in the +exponent of the DFT. Note also that we use the standard "in-order" +output ordering--the k-th output corresponds to the frequency k/n (or +k/T, where T is your total sampling period). For those who like to +think in terms of positive and negative frequencies, this means that +the positive frequencies are stored in the first half of the output and +the negative frequencies are stored in backwards order in the second +half of the output. (The frequency -k/n is the same as the frequency +(n-k)/n.) + + +File: fftw.info, Node: Multi-dimensional Transforms Reference, Next: Real One-dimensional Transforms Reference, Prev: One-dimensional Transforms Reference, Up: FFTW Reference + +3.3 Multi-dimensional Transforms Reference +========================================== + +The multi-dimensional complex routines are generally prefixed with +`fftwnd_'. Programs using FFTWND should be linked with `-lfftw -lm' on +Unix systems, or with the FFTW and standard math libraries in general. + +* Menu: + +* fftwnd_create_plan:: Plan Creation +* fftwnd:: Plan Execution +* fftwnd_destroy_plan:: Plan Destruction +* What FFTWND Really Computes:: + + +File: fftw.info, Node: fftwnd_create_plan, Next: fftwnd, Prev: Multi-dimensional Transforms Reference, Up: Multi-dimensional Transforms Reference + +3.3.1 Plan Creation for Multi-dimensional Transforms +---------------------------------------------------- + + #include + + fftwnd_plan fftwnd_create_plan(int rank, const int *n, + fftw_direction dir, int flags); + + fftwnd_plan fftw2d_create_plan(int nx, int ny, + fftw_direction dir, int flags); + + fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz, + fftw_direction dir, int flags); + + fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n, + fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); + + fftwnd_plan fftw2d_create_plan_specific(int nx, int ny, + fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); + + fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); + + The function `fftwnd_create_plan' creates a plan, which is a data +structure containing all the information that `fftwnd' needs in order +to compute a multi-dimensional Fourier transform. You can create as +many plans as you need, but only one plan for a given array size is +required (a plan can be reused many times). The functions +`fftw2d_create_plan' and `fftw3d_create_plan' are optional, alternative +interfaces to `fftwnd_create_plan' for two and three dimensions, +respectively. + + `fftwnd_create_plan' returns a valid plan, or `NULL' if, for some +reason, the plan can't be created. This can happen if memory runs out +or if the arguments are invalid in some way (e.g. if `rank' < 0). + + The `create_plan_specific' variants take as additional arguments +specific input/output arrays and their strides. For the last four +arguments, you should pass the arrays and strides that you will +eventually be passing to `fftwnd'. The resulting plans will be +optimized for those arrays and strides, although they may be used on +other arrays as well. Note: the contents of the in and out arrays are +_destroyed_ by the specific planner (the initial contents are ignored, +so the arrays need not have been initialized). *Note Discussion on +Specific Plans::, for a discussion on specific plans. + +Arguments +......... + + * `rank' is the dimensionality of the arrays to be transformed. It + can be any non-negative integer. + + * `n' is a pointer to an array of `rank' integers, giving the size + of each dimension of the arrays to be transformed. These sizes, + which must be positive integers, correspond to the dimensions of row-major + arrays--i.e. `n[0]' is the size of the dimension whose indices + vary most slowly, and so on. (*Note Multi-dimensional Array + Format::, for more information on row-major storage.) *Note Plan + Creation for One-dimensional Transforms: fftw_create_plan, for + more information regarding optimal array sizes. + + * `nx' and `ny' in `fftw2d_create_plan' are positive integers + specifying the dimensions of the rank 2 array to be transformed. + i.e. they specify that the transform will operate on `nx x ny' + arrays in row-major order, where `nx' is the number of rows and + `ny' is the number of columns. + + * `nx', `ny' and `nz' in `fftw3d_create_plan' are positive integers + specifying the dimensions of the rank 3 array to be transformed. + i.e. they specify that the transform will operate on `nx x ny x + nz' arrays in row-major order. + + * `dir' is the sign of the exponent in the formula that defines the + Fourier transform. It can be -1 or +1. The aliases + `FFTW_FORWARD' and `FFTW_BACKWARD' are provided, where + `FFTW_FORWARD' stands for -1. + + * `flags' is a boolean OR (`|') of zero or more of the following: + - `FFTW_MEASURE': this flag tells FFTW to find the optimal plan + by actually _computing_ several FFTs and measuring their + execution time. + + - `FFTW_ESTIMATE': do not run any FFT and provide a "reasonable" + plan (for a RISC processor with many registers). If neither + `FFTW_ESTIMATE' nor `FFTW_MEASURE' is provided, the default is + `FFTW_ESTIMATE'. + + - `FFTW_OUT_OF_PLACE': produce a plan assuming that the input + and output arrays will be distinct (this is the default). + + - `FFTW_IN_PLACE': produce a plan assuming that you want to + perform the transform in-place. (Unlike the one-dimensional + transform, this "really" (1) performs the transform + in-place.) Note that, if you want to perform in-place + transforms, you _must_ use a plan created with this option. + + The default mode of operation is `FFTW_OUT_OF_PLACE'. + + - `FFTW_USE_WISDOM': use any `wisdom' that is available to help + in the creation of the plan. (*Note Words of Wisdom::.) This + can greatly speed the creation of plans, especially with the + `FFTW_MEASURE' option. `FFTW_ESTIMATE' plans can also take + advantage of `wisdom' to produce a more optimal plan (based + on past measurements) than the estimation heuristic would + normally generate. When the `FFTW_MEASURE' option is used, + new `wisdom' will also be generated if the current transform + size is not completely understood by existing `wisdom'. Note + that the same `wisdom' is shared between one-dimensional and + multi-dimensional transforms. + + + * `in', `out', `istride', `ostride' (only for the + `_create_plan_specific' variants): see corresponding arguments in + the description of `fftwnd'. (*Note Computing the + Multi-dimensional Transform: fftwnd.) + + + ---------- Footnotes ---------- + + (1) `fftwnd' actually may use some temporary storage (hidden in the +plan), but this storage space is only the size of the largest dimension +of the array, rather than being as big as the entire array. (Unless +you use `fftwnd' to perform one-dimensional transforms, in which case +the temporary storage required for in-place transforms _is_ as big as +the entire array.) + + +File: fftw.info, Node: fftwnd, Next: fftwnd_destroy_plan, Prev: fftwnd_create_plan, Up: Multi-dimensional Transforms Reference + +3.3.2 Computing the Multi-dimensional Transform +----------------------------------------------- + + #include + + void fftwnd(fftwnd_plan plan, int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist); + + void fftwnd_one(fftwnd_plan p, fftw_complex *in, + fftw_complex *out); + + The function `fftwnd' computes one or more multi-dimensional Fourier +Transforms, using a plan created by `fftwnd_create_plan' (*note Plan +Creation for Multi-dimensional Transforms: fftwnd_create_plan.). (Note +that the plan determines the rank and dimensions of the array to be +transformed.) The function `fftwnd_one' provides a simplified +interface for the common case of single input array of stride 1. + +Arguments +......... + + * `plan' is the plan created by `fftwnd_create_plan'. (*note Plan + Creation for Multi-dimensional Transforms: fftwnd_create_plan.). + In the case of two and three-dimensional transforms, it could also + have been created by `fftw2d_create_plan' or `fftw3d_create_plan', + respectively. + + * `howmany' is the number of multi-dimensional transforms `fftwnd' + will compute. + + * `in', `istride' and `idist' describe the input array(s). There + are `howmany' multi-dimensional input arrays; the first one is + pointed to by `in', the second one is pointed to by `in + idist', + and so on, up to `in + (howmany - 1) * idist'. Each + multi-dimensional input array consists of complex numbers (*note + Data Types::), stored in row-major format (*note Multi-dimensional + Array Format::), which are not necessarily contiguous in memory. + Specifically, `in[0]' is the first element of the first array, + `in[istride]' is the second element of the first array, and so on. + In general, the `i'-th element of the `j'-th input array will be + in position `in[i * istride + j * idist]'. Note that, here, `i' + refers to an index into the row-major format for the + multi-dimensional array, rather than an index in any particular + dimension. + + - _In-place transforms_: For plans created with the + `FFTW_IN_PLACE' option, the transform is computed + in-place--the output is returned in the `in' array, using the + same strides, etcetera, as were used in the input. + + * `out', `ostride' and `odist' describe the output array(s). The + format is the same as for the input array. + + - _In-place transforms_: These parameters are ignored for plans + created with the `FFTW_IN_PLACE' option. + + + The function `fftwnd_one' transforms a single, contiguous input +array to a contiguous output array. By definition, the call + fftwnd_one(plan, in, out) + is equivalent to + fftwnd(plan, 1, in, 1, 1, out, 1, 1) + + +File: fftw.info, Node: fftwnd_destroy_plan, Next: What FFTWND Really Computes, Prev: fftwnd, Up: Multi-dimensional Transforms Reference + +3.3.3 Destroying a Multi-dimensional Plan +----------------------------------------- + + #include + + void fftwnd_destroy_plan(fftwnd_plan plan); + + The function `fftwnd_destroy_plan' frees the plan `plan' and +releases all the memory associated with it. After destruction, a plan +is no longer valid. + + +File: fftw.info, Node: What FFTWND Really Computes, Prev: fftwnd_destroy_plan, Up: Multi-dimensional Transforms Reference + +3.3.4 What FFTWND Really Computes +--------------------------------- + +The conventions that we follow for the multi-dimensional transform are +analogous to those for the one-dimensional transform. In particular, the +forward transform has a negative sign in the exponent and neither the +forward nor the backward transforms will perform any normalization. +Computing the backward transform of the forward transform will multiply +the array by the product of its dimensions. The output is in-order, and +the zeroth element of the output is the amplitude of the zero frequency +component. + + The TeX version of this manual contains the exact definition of the +n-dimensional transform FFTW uses. It is not possible to display the +definition on a ASCII terminal properly. + + +File: fftw.info, Node: Real One-dimensional Transforms Reference, Next: Real Multi-dimensional Transforms Reference, Prev: Multi-dimensional Transforms Reference, Up: FFTW Reference + +3.4 Real One-dimensional Transforms Reference +============================================= + +The one-dimensional real routines are generally prefixed with `rfftw_'. +(1) Programs using RFFTW should be linked with `-lrfftw -lfftw -lm' on +Unix systems, or with the RFFTW, the FFTW, and the standard math +libraries in general. + +* Menu: + +* rfftw_create_plan:: Plan Creation +* rfftw:: Plan Execution +* rfftw_destroy_plan:: Plan Destruction +* What RFFTW Really Computes:: + + ---------- Footnotes ---------- + + (1) The etymologically-correct spelling would be `frftw_', but it is +hard to remember. + + +File: fftw.info, Node: rfftw_create_plan, Next: rfftw, Prev: Real One-dimensional Transforms Reference, Up: Real One-dimensional Transforms Reference + +3.4.1 Plan Creation for Real One-dimensional Transforms +------------------------------------------------------- + + #include + + rfftw_plan rfftw_create_plan(int n, fftw_direction dir, int flags); + + rfftw_plan rfftw_create_plan_specific(int n, fftw_direction dir, + int flags, fftw_real *in, int istride, + fftw_real *out, int ostride); + + The function `rfftw_create_plan' creates a plan, which is a data +structure containing all the information that `rfftw' needs in order to +compute the 1D real Fourier transform. You can create as many plans as +you need, but only one plan for a given array size is required (a plan +can be reused many times). + + `rfftw_create_plan' returns a valid plan, or `NULL' if, for some +reason, the plan can't be created. In the default installation, this +cannot happen, but it is possible to configure RFFTW in such a way that +some input sizes are forbidden, and RFFTW cannot create a plan. + + The `rfftw_create_plan_specific' variant takes as additional +arguments specific input/output arrays and their strides. For the last +four arguments, you should pass the arrays and strides that you will +eventually be passing to `rfftw'. The resulting plans will be +optimized for those arrays and strides, although they may be used on +other arrays as well. Note: the contents of the in and out arrays are +_destroyed_ by the specific planner (the initial contents are ignored, +so the arrays need not have been initialized). *Note Discussion on +Specific Plans::, for a discussion on specific plans. + +Arguments +......... + + * `n' is the size of the transform. It can be any positive integer. + + - RFFTW is best at handling sizes of the form 2^a 3^b 5^c 7^d + 11^e 13^f, where e+f is either 0 or 1, and the other + exponents are arbitrary. Other sizes are computed by means + of a slow, general-purpose routine (reducing to O(n^2) + performance for prime sizes). (It is possible to customize + RFFTW for different array sizes. *Note Installation and + Customization::, for more information.) Transforms whose + sizes are powers of 2 are especially fast. + + * `dir' is the direction of the desired transform, either + `FFTW_REAL_TO_COMPLEX' or `FFTW_COMPLEX_TO_REAL', corresponding to + `FFTW_FORWARD' or `FFTW_BACKWARD', respectively. + + * `flags' is a boolean OR (`|') of zero or more of the following: + - `FFTW_MEASURE': this flag tells RFFTW to find the optimal + plan by actually _computing_ several FFTs and measuring their + execution time. Depending on the installation, this can take + some time. + + - `FFTW_ESTIMATE': do not run any FFT and provide a "reasonable" + plan (for a RISC processor with many registers). If neither + `FFTW_ESTIMATE' nor `FFTW_MEASURE' is provided, the default is + `FFTW_ESTIMATE'. + + - `FFTW_OUT_OF_PLACE': produce a plan assuming that the input + and output arrays will be distinct (this is the default). + + - `FFTW_IN_PLACE': produce a plan assuming that you want the + output in the input array. The algorithm used is not + necessarily in place: RFFTW is able to compute true in-place + transforms only for small values of `n'. If RFFTW is not + able to compute the transform in-place, it will allocate a + temporary array (unless you provide one yourself), compute + the transform out of place, and copy the result back. + _Warning: This option changes the meaning of some parameters + of `rfftw'_ (*note Computing the Real One-dimensional + Transform: rfftw.). + + The default mode of operation is `FFTW_OUT_OF_PLACE'. + + - `FFTW_USE_WISDOM': use any `wisdom' that is available to help + in the creation of the plan. (*Note Words of Wisdom::.) This + can greatly speed the creation of plans, especially with the + `FFTW_MEASURE' option. `FFTW_ESTIMATE' plans can also take + advantage of `wisdom' to produce a more optimal plan (based + on past measurements) than the estimation heuristic would + normally generate. When the `FFTW_MEASURE' option is used, + new `wisdom' will also be generated if the current transform + size is not completely understood by existing `wisdom'. + + + * `in', `out', `istride', `ostride' (only for + `rfftw_create_plan_specific'): see corresponding arguments in the + description of `rfftw'. (*Note Computing the Real One-dimensional + Transform: rfftw.) In particular, the `out' and `ostride' + parameters have the same special meaning for `FFTW_IN_PLACE' + transforms as they have for `rfftw'. + + + +File: fftw.info, Node: rfftw, Next: rfftw_destroy_plan, Prev: rfftw_create_plan, Up: Real One-dimensional Transforms Reference + +3.4.2 Computing the Real One-dimensional Transform +-------------------------------------------------- + + #include + + void rfftw(rfftw_plan plan, int howmany, + fftw_real *in, int istride, int idist, + fftw_real *out, int ostride, int odist); + + void rfftw_one(rfftw_plan plan, fftw_real *in, fftw_real *out); + + The function `rfftw' computes the Real One-dimensional Fourier +Transform, using a plan created by `rfftw_create_plan' (*note Plan +Creation for Real One-dimensional Transforms: rfftw_create_plan.). The +function `rfftw_one' provides a simplified interface for the common +case of single input array of stride 1. + + _Important:_ When invoked for an out-of-place, +`FFTW_COMPLEX_TO_REAL' transform, the input array is overwritten with +scratch values by these routines. The input array is not modified for +`FFTW_REAL_TO_COMPLEX' transforms. + +Arguments +......... + + * `plan' is the plan created by `rfftw_create_plan' (*note Plan + Creation for Real One-dimensional Transforms: rfftw_create_plan.). + + * `howmany' is the number of transforms `rfftw' will compute. It is + faster to tell RFFTW to compute many transforms, instead of simply + calling `rfftw' many times. + + * `in', `istride' and `idist' describe the input array(s). There + are two cases. If the `plan' defines a `FFTW_REAL_TO_COMPLEX' + transform, `in' is a real array. Otherwise, for + `FFTW_COMPLEX_TO_REAL' transforms, `in' is a halfcomplex array + _whose contents will be destroyed_. + + * `out', `ostride' and `odist' describe the output array(s), and + have the same meaning as the corresponding parameters for the + input array. + + - _In-place transforms_: If the `plan' specifies an in-place + transform, `ostride' and `odist' are always ignored. If + `out' is `NULL', `out' is ignored, too. Otherwise, `out' is + interpreted as a pointer to an array of `n' complex numbers, + that FFTW will use as temporary space to perform the in-place + computation. `out' is used as scratch space and its contents + destroyed. In this case, `out' must be an ordinary array + whose elements are contiguous in memory (no striding). + + + The function `rfftw_one' transforms a single, contiguous input array +to a contiguous output array. By definition, the call + rfftw_one(plan, in, out) + is equivalent to + rfftw(plan, 1, in, 1, 1, out, 1, 1) + + +File: fftw.info, Node: rfftw_destroy_plan, Next: What RFFTW Really Computes, Prev: rfftw, Up: Real One-dimensional Transforms Reference + +3.4.3 Destroying a Real One-dimensional Plan +-------------------------------------------- + + #include + + void rfftw_destroy_plan(rfftw_plan plan); + + The function `rfftw_destroy_plan' frees the plan `plan' and releases +all the memory associated with it. After destruction, a plan is no +longer valid. + + +File: fftw.info, Node: What RFFTW Really Computes, Prev: rfftw_destroy_plan, Up: Real One-dimensional Transforms Reference + +3.4.4 What RFFTW Really Computes +-------------------------------- + +In this section, we define precisely what RFFTW computes. + + The real to complex (`FFTW_REAL_TO_COMPLEX') transform of a real +array X of size n computes an hermitian array Y, where Y[i] = sum for j = 0 to (n - 1) of X[j] * exp(-2 pi i j sqrt(-1)/n) + (That Y is a hermitian array is not intended to be obvious, although +the proof is easy.) The hermitian array Y is stored in halfcomplex +order (*note Data Types::). Currently, RFFTW provides no way to +compute a real to complex transform with a positive sign in the +exponent. + + The complex to real (`FFTW_COMPLEX_TO_REAL') transform of a hermitian +array X of size n computes a real array Y, where Y[i] = sum for j = 0 to (n - 1) of X[j] * exp(2 pi i j sqrt(-1)/n) + (That Y is a real array is not intended to be obvious, although the +proof is easy.) The hermitian input array X is stored in halfcomplex +order (*note Data Types::). Currently, RFFTW provides no way to +compute a complex to real transform with a negative sign in the +exponent. + + Like FFTW, RFFTW computes an unnormalized transform. In other words, +applying the real to complex (forward) and then the complex to real +(backward) transform will multiply the input by n. + + +File: fftw.info, Node: Real Multi-dimensional Transforms Reference, Next: Wisdom Reference, Prev: Real One-dimensional Transforms Reference, Up: FFTW Reference + +3.5 Real Multi-dimensional Transforms Reference +=============================================== + +The multi-dimensional real routines are generally prefixed with +`rfftwnd_'. Programs using RFFTWND should be linked with `-lrfftw +-lfftw -lm' on Unix systems, or with the FFTW, RFFTW, and standard math +libraries in general. + +* Menu: + +* rfftwnd_create_plan:: Plan Creation +* rfftwnd:: Plan Execution +* Array Dimensions for Real Multi-dimensional Transforms:: +* Strides in In-place RFFTWND:: +* rfftwnd_destroy_plan:: Plan Destruction +* What RFFTWND Really Computes:: + + +File: fftw.info, Node: rfftwnd_create_plan, Next: rfftwnd, Prev: Real Multi-dimensional Transforms Reference, Up: Real Multi-dimensional Transforms Reference + +3.5.1 Plan Creation for Real Multi-dimensional Transforms +--------------------------------------------------------- + + #include + + rfftwnd_plan rfftwnd_create_plan(int rank, const int *n, + fftw_direction dir, int flags); + + rfftwnd_plan rfftw2d_create_plan(int nx, int ny, + fftw_direction dir, int flags); + + rfftwnd_plan rfftw3d_create_plan(int nx, int ny, int nz, + fftw_direction dir, int flags); + + The function `rfftwnd_create_plan' creates a plan, which is a data +structure containing all the information that `rfftwnd' needs in order +to compute a multi-dimensional real Fourier transform. You can create +as many plans as you need, but only one plan for a given array size is +required (a plan can be reused many times). The functions +`rfftw2d_create_plan' and `rfftw3d_create_plan' are optional, +alternative interfaces to `rfftwnd_create_plan' for two and three +dimensions, respectively. + + `rfftwnd_create_plan' returns a valid plan, or `NULL' if, for some +reason, the plan can't be created. This can happen if the arguments +are invalid in some way (e.g. if `rank' < 0). + +Arguments +......... + + * `rank' is the dimensionality of the arrays to be transformed. It + can be any non-negative integer. + + * `n' is a pointer to an array of `rank' integers, giving the size + of each dimension of the arrays to be transformed. Note that these + are always the dimensions of the _real_ arrays; the complex arrays + have different dimensions (*note Array Dimensions for Real + Multi-dimensional Transforms::). These sizes, which must be + positive integers, correspond to the dimensions of row-major + arrays--i.e. `n[0]' is the size of the dimension whose indices + vary most slowly, and so on. (*Note Multi-dimensional Array + Format::, for more information.) + - *Note Plan Creation for Real One-dimensional Transforms: + rfftw_create_plan, for more information regarding optimal + array sizes. + + * `nx' and `ny' in `rfftw2d_create_plan' are positive integers + specifying the dimensions of the rank 2 array to be transformed. + i.e. they specify that the transform will operate on `nx x ny' + arrays in row-major order, where `nx' is the number of rows and + `ny' is the number of columns. + + * `nx', `ny' and `nz' in `rfftw3d_create_plan' are positive integers + specifying the dimensions of the rank 3 array to be transformed. + i.e. they specify that the transform will operate on `nx x ny x + nz' arrays in row-major order. + + * `dir' is the direction of the desired transform, either + `FFTW_REAL_TO_COMPLEX' or `FFTW_COMPLEX_TO_REAL', corresponding to + `FFTW_FORWARD' or `FFTW_BACKWARD', respectively. + + * `flags' is a boolean OR (`|') of zero or more of the following: + - `FFTW_MEASURE': this flag tells FFTW to find the optimal plan + by actually _computing_ several FFTs and measuring their + execution time. + + - `FFTW_ESTIMATE': do not run any FFT and provide a "reasonable" + plan (for a RISC processor with many registers). If neither + `FFTW_ESTIMATE' nor `FFTW_MEASURE' is provided, the default is + `FFTW_ESTIMATE'. + + - `FFTW_OUT_OF_PLACE': produce a plan assuming that the input + and output arrays will be distinct (this is the default). + + - `FFTW_IN_PLACE': produce a plan assuming that you want to + perform the transform in-place. (Unlike the one-dimensional + transform, this "really" performs the transform in-place.) + Note that, if you want to perform in-place transforms, you + _must_ use a plan created with this option. The use of this + option has important implications for the size of the + input/output array (*note Computing the Real + Multi-dimensional Transform: rfftwnd.). + + The default mode of operation is `FFTW_OUT_OF_PLACE'. + + - `FFTW_USE_WISDOM': use any `wisdom' that is available to help + in the creation of the plan. (*Note Words of Wisdom::.) This + can greatly speed the creation of plans, especially with the + `FFTW_MEASURE' option. `FFTW_ESTIMATE' plans can also take + advantage of `wisdom' to produce a more optimal plan (based + on past measurements) than the estimation heuristic would + normally generate. When the `FFTW_MEASURE' option is used, + new `wisdom' will also be generated if the current transform + size is not completely understood by existing `wisdom'. Note + that the same `wisdom' is shared between one-dimensional and + multi-dimensional transforms. + + + + +File: fftw.info, Node: rfftwnd, Next: Array Dimensions for Real Multi-dimensional Transforms, Prev: rfftwnd_create_plan, Up: Real Multi-dimensional Transforms Reference + +3.5.2 Computing the Real Multi-dimensional Transform +---------------------------------------------------- + + #include + + void rfftwnd_real_to_complex(rfftwnd_plan plan, int howmany, + fftw_real *in, int istride, int idist, + fftw_complex *out, int ostride, int odist); + void rfftwnd_complex_to_real(rfftwnd_plan plan, int howmany, + fftw_complex *in, int istride, int idist, + fftw_real *out, int ostride, int odist); + + void rfftwnd_one_real_to_complex(rfftwnd_plan p, fftw_real *in, + fftw_complex *out); + void rfftwnd_one_complex_to_real(rfftwnd_plan p, fftw_complex *in, + fftw_real *out); + + These functions compute the real multi-dimensional Fourier Transform, +using a plan created by `rfftwnd_create_plan' (*note Plan Creation for +Real Multi-dimensional Transforms: rfftwnd_create_plan.). (Note that +the plan determines the rank and dimensions of the array to be +transformed.) The ``rfftwnd_one_'' functions provide a simplified +interface for the common case of single input array of stride 1. +Unlike other transform routines in FFTW, we here use separate functions +for the two directions of the transform in order to correctly express +the datatypes of the parameters. + + _Important:_ When invoked for an out-of-place, +`FFTW_COMPLEX_TO_REAL' transform with `rank > 1', the input array is +overwritten with scratch values by these routines. The input array is +not modified for `FFTW_REAL_TO_COMPLEX' transforms or for +`FFTW_COMPLEX_TO_REAL' with `rank == 1'. + +Arguments +......... + + * `plan' is the plan created by `rfftwnd_create_plan'. (*note Plan + Creation for Real Multi-dimensional Transforms: + rfftwnd_create_plan.). In the case of two and three-dimensional + transforms, it could also have been created by + `rfftw2d_create_plan' or `rfftw3d_create_plan', respectively. + + `FFTW_REAL_TO_COMPLEX' plans must be used with the + ``real_to_complex'' functions, and `FFTW_COMPLEX_TO_REAL' plans + must be used with the ``complex_to_real'' functions. It is an + error to mismatch the plan direction and the transform function. + + * `howmany' is the number of transforms to be computed. + + * `in', `istride' and `idist' describe the input array(s). There + are `howmany' input arrays; the first one is pointed to by `in', + the second one is pointed to by `in + idist', and so on, up to `in + + (howmany - 1) * idist'. Each input array is stored in row-major + format (*note Multi-dimensional Array Format::), and is not + necessarily contiguous in memory. Specifically, `in[0]' is the + first element of the first array, `in[istride]' is the second + element of the first array, and so on. In general, the `i'-th + element of the `j'-th input array will be in position `in[i * + istride + j * idist]'. Note that, here, `i' refers to an index into + the row-major format for the multi-dimensional array, rather than + an index in any particular dimension. + + The dimensions of the arrays are different for real and complex + data, and are discussed in more detail below (*note Array + Dimensions for Real Multi-dimensional Transforms::). + + - _In-place transforms_: For plans created with the + `FFTW_IN_PLACE' option, the transform is computed + in-place--the output is returned in the `in' array. The + meaning of the `stride' and `dist' parameters in this case is + subtle and is discussed below (*note Strides in In-place + RFFTWND::). + + * `out', `ostride' and `odist' describe the output array(s). The + format is the same as that for the input array. See below for a + discussion of the dimensions of the output array for real and + complex data. + + - _In-place transforms_: These parameters are ignored for plans + created with the `FFTW_IN_PLACE' option. + + + The function `rfftwnd_one' transforms a single, contiguous input +array to a contiguous output array. By definition, the call + rfftwnd_one_...(plan, in, out) + is equivalent to + rfftwnd_...(plan, 1, in, 1, 1, out, 1, 1) + + +File: fftw.info, Node: Array Dimensions for Real Multi-dimensional Transforms, Next: Strides in In-place RFFTWND, Prev: rfftwnd, Up: Real Multi-dimensional Transforms Reference + +3.5.3 Array Dimensions for Real Multi-dimensional Transforms +------------------------------------------------------------ + +The output of a multi-dimensional transform of real data contains +symmetries that, in principle, make half of the outputs redundant +(*note What RFFTWND Really Computes::). In practice, it is not +possible to entirely realize these savings in an efficient and +understandable format. Instead, the output of the rfftwnd transforms is +_slightly_ over half of the output of the corresponding complex +transform. We do not "pack" the data in any way, but store it as an +ordinary array of `fftw_complex' values. In fact, this data is simply +a subsection of what would be the array in the corresponding complex +transform. + + Specifically, for a real transform of dimensions n1 x n2 x ... x nd, +the complex data is an n1 x n2 x ... x (nd/2+1) array of `fftw_complex' +values in row-major order (with the division rounded down). That is, +we only store the lower half (plus one element) of the last dimension +of the data from the ordinary complex transform. (We could have +instead taken half of any other dimension, but implementation turns out +to be simpler if the last, contiguous, dimension is used.) + + Since the complex data is slightly larger than the real data, some +complications arise for in-place transforms. In this case, the final +dimension of the real data must be padded with extra values to +accommodate the size of the complex data--two extra if the last +dimension is even and one if it is odd. That is, the last dimension of +the real data must physically contain 2 * (nd/2+1) `fftw_real' values +(exactly enough to hold the complex data). This physical array size +does not, however, change the _logical_ array size--only nd values are +actually stored in the last dimension, and nd is the last dimension +passed to `rfftwnd_create_plan'. + + +File: fftw.info, Node: Strides in In-place RFFTWND, Next: rfftwnd_destroy_plan, Prev: Array Dimensions for Real Multi-dimensional Transforms, Up: Real Multi-dimensional Transforms Reference + +3.5.4 Strides in In-place RFFTWND +--------------------------------- + +The fact that the input and output datatypes are different for rfftwnd +complicates the meaning of the `stride' and `dist' parameters of +in-place transforms--are they in units of `fftw_real' or `fftw_complex' +elements? When reading the input, they are interpreted in units of the +datatype of the input data. When writing the output, the `istride' and +`idist' are translated to the output datatype's "units" in one of two +ways, corresponding to the two most common situations in which `stride' +and `dist' parameters are useful. Below, we refer to these +"translated" parameters as `ostride_t' and `odist_t'. (Note that these +are computed internally by rfftwnd; the actual `ostride' and `odist' +parameters are ignored for in-place transforms.) + + First, there is the case where you are transforming a number of +contiguous arrays located one after another in memory. In this +situation, `istride' is `1' and `idist' is the product of the physical +dimensions of the array. `ostride_t' and `odist_t' are then chosen so +that the output arrays are contiguous and lie on top of the input +arrays. `ostride_t' is therefore `1'. For a real-to-complex +transform, `odist_t' is `idist/2'; for a complex-to-real transform, +`odist_t' is `idist*2'. + + The second case is when you have an array in which each element has +`nc' components (e.g. a structure with `nc' numeric fields), and you +want to transform all of the components at once. Here, `istride' is +`nc' and `idist' is `1'. For this case, it is natural to want the +output to also have `nc' consecutive components, now of the output data +type; this is exactly what rfftwnd does. Specifically, it uses an +`ostride_t' equal to `istride', and an `odist_t' of `1'. (Astute +readers will realize that some extra buffer space is required in order +to perform such a transform; this is handled automatically by rfftwnd.) + + The general rule is as follows. `ostride_t' equals `istride'. If +`idist' is `1' and `idist' is less than `istride', then `odist_t' is +`1'. Otherwise, for a real-to-complex transform `odist_t' is `idist/2' +and for a complex-to-real transform `odist_t' is `idist*2'. + + +File: fftw.info, Node: rfftwnd_destroy_plan, Next: What RFFTWND Really Computes, Prev: Strides in In-place RFFTWND, Up: Real Multi-dimensional Transforms Reference + +3.5.5 Destroying a Multi-dimensional Plan +----------------------------------------- + + #include + + void rfftwnd_destroy_plan(rfftwnd_plan plan); + + The function `rfftwnd_destroy_plan' frees the plan `plan' and +releases all the memory associated with it. After destruction, a plan +is no longer valid. + + +File: fftw.info, Node: What RFFTWND Really Computes, Prev: rfftwnd_destroy_plan, Up: Real Multi-dimensional Transforms Reference + +3.5.6 What RFFTWND Really Computes +---------------------------------- + +The conventions that we follow for the real multi-dimensional transform +are analogous to those for the complex multi-dimensional transform. In +particular, the forward transform has a negative sign in the exponent +and neither the forward nor the backward transforms will perform any +normalization. Computing the backward transform of the forward +transform will multiply the array by the product of its dimensions (that +is, the logical dimensions of the real data). The forward transform is +real-to-complex and the backward transform is complex-to-real. + + The TeX version of this manual contains the exact definition of the +n-dimensional transform RFFTWND uses. It is not possible to display +the definition on a ASCII terminal properly. + + +File: fftw.info, Node: Wisdom Reference, Next: Memory Allocator Reference, Prev: Real Multi-dimensional Transforms Reference, Up: FFTW Reference + +3.6 Wisdom Reference +==================== + +* Menu: + +* fftw_export_wisdom:: +* fftw_import_wisdom:: +* fftw_forget_wisdom:: + + +File: fftw.info, Node: fftw_export_wisdom, Next: fftw_import_wisdom, Prev: Wisdom Reference, Up: Wisdom Reference + +3.6.1 Exporting Wisdom +---------------------- + + #include + + void fftw_export_wisdom(void (*emitter)(char c, void *), void *data); + void fftw_export_wisdom_to_file(FILE *output_file); + char *fftw_export_wisdom_to_string(void); + + These functions allow you to export all currently accumulated +`wisdom' in a form from which it can be later imported and restored, +even during a separate run of the program. (*Note Words of Wisdom::.) +The current store of `wisdom' is not affected by calling any of these +routines. + + `fftw_export_wisdom' exports the `wisdom' to any output medium, as +specified by the callback function `emitter'. `emitter' is a +`putc'-like function that writes the character `c' to some output; its +second parameter is the `data' pointer passed to `fftw_export_wisdom'. +For convenience, the following two "wrapper" routines are provided: + + `fftw_export_wisdom_to_file' writes the `wisdom' to the current +position in `output_file', which should be open with write permission. +Upon exit, the file remains open and is positioned at the end of the +`wisdom' data. + + `fftw_export_wisdom_to_string' returns a pointer to a +`NULL'-terminated string holding the `wisdom' data. This string is +dynamically allocated, and it is the responsibility of the caller to +deallocate it with `fftw_free' when it is no longer needed. + + All of these routines export the wisdom in the same format, which we +will not document here except to say that it is LISP-like ASCII text +that is insensitive to white space. + + +File: fftw.info, Node: fftw_import_wisdom, Next: fftw_forget_wisdom, Prev: fftw_export_wisdom, Up: Wisdom Reference + +3.6.2 Importing Wisdom +---------------------- + + #include + + fftw_status fftw_import_wisdom(int (*get_input)(void *), void *data); + fftw_status fftw_import_wisdom_from_file(FILE *input_file); + fftw_status fftw_import_wisdom_from_string(const char *input_string); + + These functions import `wisdom' into a program from data stored by +the `fftw_export_wisdom' functions above. (*Note Words of Wisdom::.) +The imported `wisdom' supplements rather than replaces any `wisdom' +already accumulated by the running program (except when there is +conflicting `wisdom', in which case the existing wisdom is replaced). + + `fftw_import_wisdom' imports `wisdom' from any input medium, as +specified by the callback function `get_input'. `get_input' is a +`getc'-like function that returns the next character in the input; its +parameter is the `data' pointer passed to `fftw_import_wisdom'. If the +end of the input data is reached (which should never happen for valid +data), it may return either `NULL' (ASCII 0) or `EOF' (as defined in +`'). For convenience, the following two "wrapper" routines +are provided: + + `fftw_import_wisdom_from_file' reads `wisdom' from the current +position in `input_file', which should be open with read permission. +Upon exit, the file remains open and is positioned at the end of the +`wisdom' data. + + `fftw_import_wisdom_from_string' reads `wisdom' from the +`NULL'-terminated string `input_string'. + + The return value of these routines is `FFTW_SUCCESS' if the wisdom +was read successfully, and `FFTW_FAILURE' otherwise. Note that, in all +of these functions, any data in the input stream past the end of the +`wisdom' data is simply ignored (it is not even read if the `wisdom' +data is well-formed). + + +File: fftw.info, Node: fftw_forget_wisdom, Prev: fftw_import_wisdom, Up: Wisdom Reference + +3.6.3 Forgetting Wisdom +----------------------- + + #include + + void fftw_forget_wisdom(void); + + Calling `fftw_forget_wisdom' causes all accumulated `wisdom' to be +discarded and its associated memory to be freed. (New `wisdom' can +still be gathered subsequently, however.) + + +File: fftw.info, Node: Memory Allocator Reference, Next: Thread safety, Prev: Wisdom Reference, Up: FFTW Reference + +3.7 Memory Allocator Reference +============================== + + #include + + void *(*fftw_malloc_hook) (size_t n); + void (*fftw_free_hook) (void *p); + + Whenever it has to allocate and release memory, FFTW ordinarily calls +`malloc' and `free'. If `malloc' fails, FFTW prints an error message +and exits. This behavior may be undesirable in some applications. +Also, special memory-handling functions may be necessary in certain +environments. Consequently, FFTW provides means by which you can install +your own memory allocator and take whatever error-correcting action you +find appropriate. The variables `fftw_malloc_hook' and +`fftw_free_hook' are pointers to functions, and they are normally +`NULL'. If you set those variables to point to other functions, then +FFTW will use your routines instead of `malloc' and `free'. +`fftw_malloc_hook' must point to a `malloc'-like function, and +`fftw_free_hook' must point to a `free'-like function. + + +File: fftw.info, Node: Thread safety, Prev: Memory Allocator Reference, Up: FFTW Reference + +3.8 Thread safety +================= + +Users writing multi-threaded programs must concern themselves with the +"thread safety" of the libraries they use--that is, whether it is safe +to call routines in parallel from multiple threads. FFTW can be used +in such an environment, but some care must be taken because certain +parts of FFTW use private global variables to share data between calls. +In particular, the plan-creation functions share trigonometric tables +and accumulated `wisdom'. (Users should note that these comments only +apply to programs using shared-memory threads. Parallelism using MPI +or forked processes involves a separate address-space and global +variables for each process, and is not susceptible to problems of this +sort.) + + The central restriction of FFTW is that it is not safe to create +multiple plans in parallel. You must either create all of your plans +from a single thread, or instead use a semaphore, mutex, or other +mechanism to ensure that different threads don't attempt to create plans +at the same time. The same restriction also holds for destruction of +plans and importing/forgetting `wisdom'. Once created, a plan may +safely be used in any thread. + + The actual transform routines in FFTW (`fftw_one', etcetera) are +re-entrant and thread-safe, so it is fine to call them simultaneously +from multiple threads. Another question arises, however--is it safe to +use the _same plan_ for multiple transforms in parallel? (It would be +unsafe if, for example, the plan were modified in some way by the +transform.) We address this question by defining an additional planner +flag, `FFTW_THREADSAFE'. When included in the flags for any of the +plan-creation routines, `FFTW_THREADSAFE' guarantees that the resulting +plan will be read-only and safe to use in parallel by multiple threads. + + +File: fftw.info, Node: Parallel FFTW, Next: Calling FFTW from Fortran, Prev: FFTW Reference, Up: Top + +4 Parallel FFTW +*************** + +In this chapter we discuss the use of FFTW in a parallel environment, +documenting the different parallel libraries that we have provided. +(Users calling FFTW from a multi-threaded program should also consult +*Note Thread safety::.) The FFTW package currently contains three +parallel transform implementations that leverage the uniprocessor FFTW +code: + + * The first set of routines utilizes shared-memory threads for + parallel one- and multi-dimensional transforms of both real and + complex data. Any program using FFTW can be trivially modified to + use the multi-threaded routines. This code can use any common + threads implementation, including POSIX threads. (POSIX threads + are available on most Unix variants, including Linux.) These + routines are located in the `threads' directory, and are + documented in *Note Multi-threaded FFTW::. + + * The `mpi' directory contains multi-dimensional transforms of real + and complex data for parallel machines supporting MPI. It also + includes parallel one-dimensional transforms for complex data. + The main feature of this code is that it supports + distributed-memory transforms, so it runs on everything from + workstation clusters to massively-parallel supercomputers. More + information on MPI can be found at the MPI home page + (http://www.mcs.anl.gov/mpi). The FFTW MPI routines are + documented in *Note MPI FFTW::. + + * We also have an experimental parallel implementation written in + Cilk, a C-like parallel language developed at MIT and currently + available for several SMP platforms. For more information on Cilk + see the Cilk home page (http://supertech.lcs.mit.edu/cilk). The + FFTW Cilk code can be found in the `cilk' directory, with + parallelized one- and multi-dimensional transforms of complex + data. The Cilk FFTW routines are documented in `cilk/README'. + + +* Menu: + +* Multi-threaded FFTW:: +* MPI FFTW:: + + +File: fftw.info, Node: Multi-threaded FFTW, Next: MPI FFTW, Prev: Parallel FFTW, Up: Parallel FFTW + +4.1 Multi-threaded FFTW +======================= + +In this section we document the parallel FFTW routines for shared-memory +threads on SMP hardware. These routines, which support parallel one- +and multi-dimensional transforms of both real and complex data, are the +easiest way to take advantage of multiple processors with FFTW. They +work just like the corresponding uniprocessor transform routines, except +that they take the number of parallel threads to use as an extra +parameter. Any program that uses the uniprocessor FFTW can be trivially +modified to use the multi-threaded FFTW. + +* Menu: + +* Installation and Supported Hardware/Software:: +* Usage of Multi-threaded FFTW:: +* How Many Threads to Use?:: +* Using Multi-threaded FFTW in a Multi-threaded Program:: +* Tips for Optimal Threading:: + + +File: fftw.info, Node: Installation and Supported Hardware/Software, Next: Usage of Multi-threaded FFTW, Prev: Multi-threaded FFTW, Up: Multi-threaded FFTW + +4.1.1 Installation and Supported Hardware/Software +-------------------------------------------------- + +All of the FFTW threads code is located in the `threads' subdirectory +of the FFTW package. On Unix systems, the FFTW threads libraries and +header files can be automatically configured, compiled, and installed +along with the uniprocessor FFTW libraries simply by including +`--enable-threads' in the flags to the `configure' script (*note +Installation on Unix::). (Note also that the threads routines, when +enabled, are automatically tested by the ``make check'' self-tests.) + + The threads routines require your operating system to have some sort +of shared-memory threads support. Specifically, the FFTW threads +package works with POSIX threads (available on most Unix variants, +including Linux), Solaris threads, BeOS (http://www.be.com) threads +(tested on BeOS DR8.2), Mach C threads (reported to work by users), and +Win32 threads (reported to work by users). (There is also untested +code to use MacOS MP threads.) If you have a shared-memory machine +that uses a different threads API, it should be a simple matter of +programming to include support for it; see the file +`fftw_threads-int.h' for more detail. + + SMP hardware is not required, although of course you need multiple +processors to get any benefit from the multithreaded transforms. + + +File: fftw.info, Node: Usage of Multi-threaded FFTW, Next: How Many Threads to Use?, Prev: Installation and Supported Hardware/Software, Up: Multi-threaded FFTW + +4.1.2 Usage of Multi-threaded FFTW +---------------------------------- + +Here, it is assumed that the reader is already familiar with the usage +of the uniprocessor FFTW routines, described elsewhere in this manual. +We only describe what one has to change in order to use the +multi-threaded routines. + + First, instead of including `' or `', you should +include the files `' or `', +respectively. + + Second, before calling any FFTW routines, you should call the +function: + + int fftw_threads_init(void); + + This function, which should only be called once (probably in your +`main()' function), performs any one-time initialization required to +use threads on your system. It returns zero if successful, and a +non-zero value if there was an error (in which case, something is +seriously wrong and you should probably exit the program). + + Third, when you want to actually compute the transform, you should +use one of the following transform routines instead of the ordinary FFTW +functions: + + fftw_threads(nthreads, plan, howmany, in, istride, + idist, out, ostride, odist); + + fftw_threads_one(nthreads, plan, in, out); + + fftwnd_threads(nthreads, plan, howmany, in, istride, + idist, out, ostride, odist); + + fftwnd_threads_one(nthreads, plan, in, out); + + rfftw_threads(nthreads, plan, howmany, in, istride, + idist, out, ostride, odist); + + rfftw_threads_one(nthreads, plan, in, out); + + rfftwnd_threads_real_to_complex(nthreads, plan, howmany, in, + istride, idist, out, ostride, odist); + + rfftwnd_threads_one_real_to_complex(nthreads, plan, in, out); + + rfftwnd_threads_complex_to_real(nthreads, plan, howmany, in, + istride, idist, out, ostride, odist); + + rfftwnd_threads_one_real_to_complex(nthreads, plan, in, out); + + rfftwnd_threads_one_complex_to_real(nthreads, plan, in, out); + + + All of these routines take exactly the same arguments and have +exactly the same effects as their uniprocessor counterparts (i.e. +without the ``_threads'') _except_ that they take one extra parameter, +`nthreads' (of type `int'), before the normal parameters.(1) The +`nthreads' parameter specifies the number of threads of execution to +use when performing the transform (actually, the maximum number of +threads). + + For example, to parallelize a single one-dimensional transform of +complex data, instead of calling the uniprocessor `fftw_one(plan, in, +out)', you would call `fftw_threads_one(nthreads, plan, in, out)'. +Passing an `nthreads' of `1' means to use only one thread (the main +thread), and is equivalent to calling the uniprocessor routine. +Passing an `nthreads' of `2' means that the transform is potentially +parallelized over two threads (and two processors, if you have them), +and so on. + + These are the only changes you need to make to your source code. +Calls to all other FFTW routines (plan creation, destruction, wisdom, +etcetera) are not parallelized and remain the same. (The same plans and +wisdom are used by both uniprocessor and multi-threaded transforms.) +Your arrays are allocated and formatted in the same way, and so on. + + Programs using the parallel complex transforms should be linked with +`-lfftw_threads -lfftw -lm' on Unix. Programs using the parallel real +transforms should be linked with `-lrfftw_threads -lfftw_threads +-lrfftw -lfftw -lm'. You will also need to link with whatever library +is responsible for threads on your system (e.g. `-lpthread' on Linux). + + ---------- Footnotes ---------- + + (1) There is one exception: when performing one-dimensional in-place +transforms, the `out' parameter is always ignored by the multi-threaded +routines, instead of being used as a workspace if it is non-`NULL' as +in the uniprocessor routines. The multi-threaded routines always +allocate their own workspace (the size of which depends upon the number +of threads). + + +File: fftw.info, Node: How Many Threads to Use?, Next: Using Multi-threaded FFTW in a Multi-threaded Program, Prev: Usage of Multi-threaded FFTW, Up: Multi-threaded FFTW + +4.1.3 How Many Threads to Use? +------------------------------ + +There is a fair amount of overhead involved in spawning and +synchronizing threads, so the optimal number of threads to use depends +upon the size of the transform as well as on the number of processors +you have. + + As a general rule, you don't want to use more threads than you have +processors. (Using more threads will work, but there will be extra +overhead with no benefit.) In fact, if the problem size is too small, +you may want to use fewer threads than you have processors. + + You will have to experiment with your system to see what level of +parallelization is best for your problem size. Useful tools to help you +do this are the test programs that are automatically compiled along with +the threads libraries, `fftw_threads_test' and `rfftw_threads_test' (in +the `threads' subdirectory). These take the same arguments as the +other FFTW test programs (see `tests/README'), except that they also +take the number of threads to use as a first argument, and report the +parallel speedup in speed tests. For example, + + fftw_threads_test 2 -s 128x128 + + will benchmark complex 128x128 transforms using two threads and +report the speedup relative to the uniprocessor transform. + + For instance, on a 4-processor 200MHz Pentium Pro system running +Linux 2.2.0, we found that the "crossover" point at which 2 threads +became beneficial for complex transforms was about 4k points, while 4 +threads became beneficial at 8k points. + + +File: fftw.info, Node: Using Multi-threaded FFTW in a Multi-threaded Program, Next: Tips for Optimal Threading, Prev: How Many Threads to Use?, Up: Multi-threaded FFTW + +4.1.4 Using Multi-threaded FFTW in a Multi-threaded Program +----------------------------------------------------------- + +It is perfectly possible to use the multi-threaded FFTW routines from a +multi-threaded program (e.g. have multiple threads computing +multi-threaded transforms simultaneously). If you have the processors, +more power to you! However, the same restrictions apply as for the +uniprocessor FFTW routines (*note Thread safety::). In particular, you +should recall that you may not create or destroy plans in parallel. + + +File: fftw.info, Node: Tips for Optimal Threading, Prev: Using Multi-threaded FFTW in a Multi-threaded Program, Up: Multi-threaded FFTW + +4.1.5 Tips for Optimal Threading +-------------------------------- + +Not all transforms are equally well-parallelized by the multi-threaded +FFTW routines. (This is merely a consequence of laziness on the part of +the implementors, and is not inherent to the algorithms employed.) +Mainly, the limitations are in the parallel one-dimensional transforms. +The things to avoid if you want optimal parallelization are as follows: + +4.1.6 Parallelization deficiencies in one-dimensional transforms +---------------------------------------------------------------- + + * Large prime factors can sometimes parallelize poorly. Of course, + you should avoid these anyway if you want high performance. + + * Single in-place transforms don't parallelize completely. (Multiple + in-place transforms, i.e. `howmany > 1', are fine.) Again, you + should avoid these in any case if you want high performance, as + they require transforming to a scratch array and copying back. + + * Single real-complex (`rfftw') transforms don't parallelize + completely. This is unfortunate, but parallelizing this correctly + would have involved a lot of extra code (and a much larger + library). You still get some benefit from additional processors, + but if you have a very large number of processors you will + probably be better off using the parallel complex (`fftw') + transforms. Note that multi-dimensional real transforms or + multiple one-dimensional real transforms are fine. + + + +File: fftw.info, Node: MPI FFTW, Prev: Multi-threaded FFTW, Up: Parallel FFTW + +4.2 MPI FFTW +============ + +This section describes the MPI FFTW routines for distributed-memory (and +shared-memory) machines supporting MPI (Message Passing Interface). The +MPI routines are significantly different from the ordinary FFTW because +the transform data here are _distributed_ over multiple processes, so +that each process gets only a portion of the array. Currently, +multi-dimensional transforms of both real and complex data, as well as +one-dimensional transforms of complex data, are supported. + +* Menu: + +* MPI FFTW Installation:: +* Usage of MPI FFTW for Complex Multi-dimensional Transforms:: +* MPI Data Layout:: +* Usage of MPI FFTW for Real Multi-dimensional Transforms:: +* Usage of MPI FFTW for Complex One-dimensional Transforms:: +* MPI Tips:: + + +File: fftw.info, Node: MPI FFTW Installation, Next: Usage of MPI FFTW for Complex Multi-dimensional Transforms, Prev: MPI FFTW, Up: MPI FFTW + +4.2.1 MPI FFTW Installation +--------------------------- + +The FFTW MPI library code is all located in the `mpi' subdirectoy of +the FFTW package (along with source code for test programs). On Unix +systems, the FFTW MPI libraries and header files can be automatically +configured, compiled, and installed along with the uniprocessor FFTW +libraries simply by including `--enable-mpi' in the flags to the +`configure' script (*note Installation on Unix::). + + The only requirement of the FFTW MPI code is that you have the +standard MPI 1.1 (or later) libraries and header files installed on +your system. A free implementation of MPI is available from the MPICH +home page (http://www-unix.mcs.anl.gov/mpi/mpich/). + + Previous versions of the FFTW MPI routines have had an unfortunate +tendency to expose bugs in MPI implementations. The current version has +been largely rewritten, and hopefully avoids some of the problems. If +you run into difficulties, try passing the optional workspace to +`(r)fftwnd_mpi' (see below), as this allows us to use the standard (and +hopefully well-tested) `MPI_Alltoall' primitive for communications. +Please let us know () how things work out. + + Several test programs are included in the `mpi' directory. The ones +most useful to you are probably the `fftw_mpi_test' and +`rfftw_mpi_test' programs, which are run just like an ordinary MPI +program and accept the same parameters as the other FFTW test programs +(c.f. `tests/README'). For example, `mpirun ...params... +fftw_mpi_test -r 0' will run non-terminating complex-transform +correctness tests of random dimensions. They can also do performance +benchmarks. + + +File: fftw.info, Node: Usage of MPI FFTW for Complex Multi-dimensional Transforms, Next: MPI Data Layout, Prev: MPI FFTW Installation, Up: MPI FFTW + +4.2.2 Usage of MPI FFTW for Complex Multi-dimensional Transforms +---------------------------------------------------------------- + +Usage of the MPI FFTW routines is similar to that of the uniprocessor +FFTW. We assume that the reader already understands the usage of the +uniprocessor FFTW routines, described elsewhere in this manual. Some +familiarity with MPI is also helpful. + + A typical program performing a complex two-dimensional MPI transform +might look something like: + + #include + + int main(int argc, char **argv) + { + const int NX = ..., NY = ...; + fftwnd_mpi_plan plan; + fftw_complex *data; + + MPI_Init(&argc,&argv); + + plan = fftw2d_mpi_create_plan(MPI_COMM_WORLD, + NX, NY, + FFTW_FORWARD, FFTW_ESTIMATE); + + ...allocate and initialize data... + + fftwnd_mpi(p, 1, data, NULL, FFTW_NORMAL_ORDER); + + ... + + fftwnd_mpi_destroy_plan(plan); + MPI_Finalize(); + } + + The calls to `MPI_Init' and `MPI_Finalize' are required in all MPI +programs; see the MPI home page (http://www.mcs.anl.gov/mpi/) for more +information. Note that all of your processes run the program in +parallel, as a group; there is no explicit launching of +threads/processes in an MPI program. + + As in the ordinary FFTW, the first thing we do is to create a plan +(of type `fftwnd_mpi_plan'), using: + + fftwnd_mpi_plan fftw2d_mpi_create_plan(MPI_Comm comm, + int nx, int ny, + fftw_direction dir, int flags); + + Except for the first argument, the parameters are identical to those +of `fftw2d_create_plan'. (There are also analogous +`fftwnd_mpi_create_plan' and `fftw3d_mpi_create_plan' functions. +Transforms of any rank greater than one are supported.) The first +argument is an MPI "communicator", which specifies the group of +processes that are to be involved in the transform; the standard +constant `MPI_COMM_WORLD' indicates all available processes. + + Next, one has to allocate and initialize the data. This is somewhat +tricky, because the transform data is distributed across the processes +involved in the transform. It is discussed in detail by the next +section (*note MPI Data Layout::). + + The actual computation of the transform is performed by the function +`fftwnd_mpi', which differs somewhat from its uniprocessor equivalent +and is described by: + + void fftwnd_mpi(fftwnd_mpi_plan p, + int n_fields, + fftw_complex *local_data, fftw_complex *work, + fftwnd_mpi_output_order output_order); + + There are several things to notice here: + + * First of all, all `fftw_mpi' transforms are in-place: the output is + in the `local_data' parameter, and there is no need to specify + `FFTW_IN_PLACE' in the plan flags. + + * The MPI transforms also only support a limited subset of the + `howmany'/`stride'/`dist' functionality of the uniprocessor + routines: the `n_fields' parameter is equivalent to + `howmany=n_fields', `stride=n_fields', and `dist=1'. + (Conceptually, the `n_fields' parameter allows you to transform an + array of contiguous vectors, each with length `n_fields'.) + `n_fields' is `1' if you are only transforming a single, ordinary + array. + + * The `work' parameter is an optional workspace. If it is not + `NULL', it should be exactly the same size as the `local_data' + array. If it is provided, FFTW is able to use the built-in + `MPI_Alltoall' primitive for (often) greater efficiency at the expense + of extra storage space. + + * Finally, the last parameter specifies whether the output data has + the same ordering as the input data (`FFTW_NORMAL_ORDER'), or if + it is transposed (`FFTW_TRANSPOSED_ORDER'). Leaving the data + transposed results in significant performance improvements due to + a saved communication step (needed to un-transpose the data). + Specifically, the first two dimensions of the array are + transposed, as is described in more detail by the next section. + + + The output of `fftwnd_mpi' is identical to that of the corresponding +uniprocessor transform. In particular, you should recall our +conventions for normalization and the sign of the transform exponent. + + The same plan can be used to compute many transforms of the same +size. After you are done with it, you should deallocate it by calling +`fftwnd_mpi_destroy_plan'. + + Important: The FFTW MPI routines must be called in the same order by +all processes involved in the transform. You should assume that they +all are blocking, as if each contained a call to `MPI_Barrier'. + + Programs using the FFTW MPI routines should be linked with +`-lfftw_mpi -lfftw -lm' on Unix, in addition to whatever libraries are +required for MPI. + + +File: fftw.info, Node: MPI Data Layout, Next: Usage of MPI FFTW for Real Multi-dimensional Transforms, Prev: Usage of MPI FFTW for Complex Multi-dimensional Transforms, Up: MPI FFTW + +4.2.3 MPI Data Layout +--------------------- + +The transform data used by the MPI FFTW routines is "distributed": a +distinct portion of it resides with each process involved in the +transform. This allows the transform to be parallelized, for example, +over a cluster of workstations, each with its own separate memory, so +that you can take advantage of the total memory of all the processors +you are parallelizing over. + + In particular, the array is divided according to the rows (first +dimension) of the data: each process gets a subset of the rows of the data. +(This is sometimes called a "slab decomposition.") One consequence of +this is that you can't take advantage of more processors than you have +rows (e.g. `64x64x64' matrix can at most use 64 processors). This +isn't usually much of a limitation, however, as each processor needs a +fair amount of data in order for the parallel-computation benefits to +outweight the communications costs. + + Below, the first dimension of the data will be referred to as ``x'' +and the second dimension as ``y''. + + FFTW supplies a routine to tell you exactly how much data resides on +the current process: + + void fftwnd_mpi_local_sizes(fftwnd_mpi_plan p, + int *local_nx, + int *local_x_start, + int *local_ny_after_transpose, + int *local_y_start_after_transpose, + int *total_local_size); + + Given a plan `p', the other parameters of this routine are set to +values describing the required data layout, described below. + + `total_local_size' is the number of `fftw_complex' elements that you +must allocate for your local data (and workspace, if you choose). +(This value should, of course, be multiplied by `n_fields' if that +parameter to `fftwnd_mpi' is not `1'.) + + The data on the current process has `local_nx' rows, starting at row +`local_x_start'. If `fftwnd_mpi' is called with +`FFTW_TRANSPOSED_ORDER' output, then `y' will be the first dimension of +the output, and the local `y' extent will be given by +`local_ny_after_transpose' and `local_y_start_after_transpose'. +Otherwise, the output has the same dimensions and layout as the input. + + For instance, suppose you want to transform three-dimensional data of +size `nx x ny x nz'. Then, the current process will store a subset of +this data, of size `local_nx x ny x nz', where the `x' indices +correspond to the range `local_x_start' to `local_x_start+local_nx-1' +in the "real" (i.e. logical) array. If `fftwnd_mpi' is called with +`FFTW_TRANSPOSED_ORDER' output, then the result will be a `ny x nx x +nz' array, of which a `local_ny_after_transpose x nx x nz' subset is +stored on the current process (corresponding to `y' values starting at +`local_y_start_after_transpose'). + + The following is an example of allocating such a three-dimensional +array array (`local_data') before the transform and initializing it to +some function `f(x,y,z)': + + fftwnd_mpi_local_sizes(plan, &local_nx, &local_x_start, + &local_ny_after_transpose, + &local_y_start_after_transpose, + &total_local_size); + + local_data = (fftw_complex*) malloc(sizeof(fftw_complex) * + total_local_size); + + for (x = 0; x < local_nx; ++x) + for (y = 0; y < ny; ++y) + for (z = 0; z < nz; ++z) + local_data[(x*ny + y)*nz + z] + = f(x + local_x_start, y, z); + + Some important things to remember: + + * Although the local data is of dimensions `local_nx x ny x nz' in + the above example, do _not_ allocate the array to be of size + `local_nx*ny*nz'. Use `total_local_size' instead. + + * The amount of data on each process will not necessarily be the + same; in fact, `local_nx' may even be zero for some processes. + (For example, suppose you are doing a `6x6' transform on four + processors. There is no way to effectively use the fourth + processor in a slab decomposition, so we leave it empty. Proof + left as an exercise for the reader.) + + * All arrays are, of course, in row-major order (*note + Multi-dimensional Array Format::). + + * If you want to compute the inverse transform of the output of + `fftwnd_mpi', the dimensions of the inverse transform are given by + the dimensions of the output of the forward transform. For + example, if you are using `FFTW_TRANSPOSED_ORDER' output in the + above example, then the inverse plan should be created with + dimensions `ny x nx x nz'. + + * The data layout only depends upon the dimensions of the array, not + on the plan, so you are guaranteed that different plans for the + same size (or inverse plans) will use the same (consistent) data + layouts. + + + +File: fftw.info, Node: Usage of MPI FFTW for Real Multi-dimensional Transforms, Next: Usage of MPI FFTW for Complex One-dimensional Transforms, Prev: MPI Data Layout, Up: MPI FFTW + +4.2.4 Usage of MPI FFTW for Real Multi-dimensional Transforms +------------------------------------------------------------- + +MPI transforms specialized for real data are also available, similiar to +the uniprocessor `rfftwnd' transforms. Just as in the uniprocessor +case, the real-data MPI functions gain roughly a factor of two in speed +(and save a factor of two in space) at the expense of more complicated +data formats in the calling program. Before reading this section, you +should definitely understand how to call the uniprocessor `rfftwnd' +functions and also the complex MPI FFTW functions. + + The following is an example of a program using `rfftwnd_mpi'. It +computes the size `nx x ny x nz' transform of a real function +`f(x,y,z)', multiplies the imaginary part by `2' for fun, then computes +the inverse transform. (We'll also use `FFTW_TRANSPOSED_ORDER' output +for the transform, and additionally supply the optional workspace +parameter to `rfftwnd_mpi', just to add a little spice.) + + #include + + int main(int argc, char **argv) + { + const int nx = ..., ny = ..., nz = ...; + int local_nx, local_x_start, local_ny_after_transpose, + local_y_start_after_transpose, total_local_size; + int x, y, z; + rfftwnd_mpi_plan plan, iplan; + fftw_real *data, *work; + fftw_complex *cdata; + + MPI_Init(&argc,&argv); + + /* create the forward and backward plans: */ + plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, + nx, ny, nz, + FFTW_REAL_TO_COMPLEX, + FFTW_ESTIMATE); + iplan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, + /* dim.'s of REAL data --> */ nx, ny, nz, + FFTW_COMPLEX_TO_REAL, + FFTW_ESTIMATE); + + rfftwnd_mpi_local_sizes(plan, &local_nx, &local_x_start, + &local_ny_after_transpose, + &local_y_start_after_transpose, + &total_local_size); + + data = (fftw_real*) malloc(sizeof(fftw_real) * total_local_size); + + /* workspace is the same size as the data: */ + work = (fftw_real*) malloc(sizeof(fftw_real) * total_local_size); + + /* initialize data to f(x,y,z): */ + for (x = 0; x < local_nx; ++x) + for (y = 0; y < ny; ++y) + for (z = 0; z < nz; ++z) + data[(x*ny + y) * (2*(nz/2+1)) + z] + = f(x + local_x_start, y, z); + + /* Now, compute the forward transform: */ + rfftwnd_mpi(plan, 1, data, work, FFTW_TRANSPOSED_ORDER); + + /* the data is now complex, so typecast a pointer: */ + cdata = (fftw_complex*) data; + + /* multiply imaginary part by 2, for fun: + (note that the data is transposed) */ + for (y = 0; y < local_ny_after_transpose; ++y) + for (x = 0; x < nx; ++x) + for (z = 0; z < (nz/2+1); ++z) + cdata[(y*nx + x) * (nz/2+1) + z].im + *= 2.0; + + /* Finally, compute the inverse transform; the result + is transposed back to the original data layout: */ + rfftwnd_mpi(iplan, 1, data, work, FFTW_TRANSPOSED_ORDER); + + free(data); + free(work); + rfftwnd_mpi_destroy_plan(plan); + rfftwnd_mpi_destroy_plan(iplan); + MPI_Finalize(); + } + + There's a lot of stuff in this example, but it's all just what you +would have guessed, right? We replaced all the `fftwnd_mpi*' functions +by `rfftwnd_mpi*', but otherwise the parameters were pretty much the +same. The data layout distributed among the processes just like for +the complex transforms (*note MPI Data Layout::), but in addition the +final dimension is padded just like it is for the uniprocessor in-place +real transforms (*note Array Dimensions for Real Multi-dimensional +Transforms::). In particular, the `z' dimension of the real input data +is padded to a size `2*(nz/2+1)', and after the transform it contains +`nz/2+1' complex values. + + Some other important things to know about the real MPI transforms: + + * As for the uniprocessor `rfftwnd_create_plan', the dimensions + passed for the `FFTW_COMPLEX_TO_REAL' plan are those of the _real_ + data. In particular, even when `FFTW_TRANSPOSED_ORDER' is used as + in this case, the dimensions are those of the (untransposed) real + output, not the (transposed) complex input. (For the complex MPI + transforms, on the other hand, the dimensions are always those of + the input array.) + + * The output ordering of the transform (`FFTW_TRANSPOSED_ORDER' or + `FFTW_TRANSPOSED_ORDER') _must_ be the same for both forward and + backward transforms. (This is not required in the complex case.) + + * `total_local_size' is the required size in `fftw_real' values, not + `fftw_complex' values as it is for the complex transforms. + + * `local_ny_after_transpose' and `local_y_start_after_transpose' + describe the portion of the array after the transform; that is, + they are indices in the complex array for an + `FFTW_REAL_TO_COMPLEX' transform and in the real array for an + `FFTW_COMPLEX_TO_REAL' transform. + + * `rfftwnd_mpi' always expects `fftw_real*' array arguments, but of + course these pointers can refer to either real or complex arrays, + depending upon which side of the transform you are on. Just as for + in-place uniprocessor real transforms (and also in the example + above), this is most easily handled by typecasting to a complex + pointer when handling the complex data. + + * As with the complex transforms, there are also + `rfftwnd_create_plan' and `rfftw2d_create_plan' functions, and any + rank greater than one is supported. + + + Programs using the MPI FFTW real transforms should link with +`-lrfftw_mpi -lfftw_mpi -lrfftw -lfftw -lm' on Unix. + + +File: fftw.info, Node: Usage of MPI FFTW for Complex One-dimensional Transforms, Next: MPI Tips, Prev: Usage of MPI FFTW for Real Multi-dimensional Transforms, Up: MPI FFTW + +4.2.5 Usage of MPI FFTW for Complex One-dimensional Transforms +-------------------------------------------------------------- + +The MPI FFTW also includes routines for parallel one-dimensional +transforms of complex data (only). Although the speedup is generally +worse than it is for the multi-dimensional routines,(1) these +distributed-memory one-dimensional transforms are especially useful for +performing one-dimensional transforms that don't fit into the memory of +a single machine. + + The usage of these routines is straightforward, and is similar to +that of the multi-dimensional MPI transform functions. You first +include the header `' and then create a plan by calling: + + fftw_mpi_plan fftw_mpi_create_plan(MPI_Comm comm, int n, + fftw_direction dir, int flags); + + The last three arguments are the same as for `fftw_create_plan' +(except that all MPI transforms are automatically `FFTW_IN_PLACE'). +The first argument specifies the group of processes you are using, and +is usually `MPI_COMM_WORLD' (all processes). A plan can be used for +many transforms of the same size, and is destroyed when you are done +with it by calling `fftw_mpi_destroy_plan(plan)'. + + If you don't care about the ordering of the input or output data of +the transform, you can include `FFTW_SCRAMBLED_INPUT' and/or +`FFTW_SCRAMBLED_OUTPUT' in the `flags'. These save some communications +at the expense of having the input and/or output reordered in an +undocumented way. For example, if you are performing an FFT-based +convolution, you might use `FFTW_SCRAMBLED_OUTPUT' for the forward +transform and `FFTW_SCRAMBLED_INPUT' for the inverse transform. + + The transform itself is computed by: + + void fftw_mpi(fftw_mpi_plan p, int n_fields, + fftw_complex *local_data, fftw_complex *work); + + `n_fields', as in `fftwnd_mpi', is equivalent to `howmany=n_fields', +`stride=n_fields', and `dist=1', and should be `1' when you are +computing the transform of a single array. `local_data' contains the +portion of the array local to the current process, described below. +`work' is either `NULL' or an array exactly the same size as +`local_data'; in the latter case, FFTW can use the `MPI_Alltoall' +communications primitive which is (usually) faster at the expense of +extra storage. Upon return, `local_data' contains the portion of the +output local to the current process (see below). + + To find out what portion of the array is stored local to the current +process, you call the following routine: + + void fftw_mpi_local_sizes(fftw_mpi_plan p, + int *local_n, int *local_start, + int *local_n_after_transform, + int *local_start_after_transform, + int *total_local_size); + + `total_local_size' is the number of `fftw_complex' elements you +should actually allocate for `local_data' (and `work'). `local_n' and +`local_start' indicate that the current process stores `local_n' +elements corresponding to the indices `local_start' to +`local_start+local_n-1' in the "real" array. _After the transform, the +process may store a different portion of the array._ The portion of +the data stored on the process after the transform is given by +`local_n_after_transform' and `local_start_after_transform'. This data +is exactly the same as a contiguous segment of the corresponding +uniprocessor transform output (i.e. an in-order sequence of sequential +frequency bins). + + Note that, if you compute both a forward and a backward transform of +the same size, the local sizes are guaranteed to be consistent. That +is, the local size after the forward transform will be the same as the +local size before the backward transform, and vice versa. + + Programs using the FFTW MPI routines should be linked with +`-lfftw_mpi -lfftw -lm' on Unix, in addition to whatever libraries are +required for MPI. + + ---------- Footnotes ---------- + + (1) The 1D transforms require much more communication. All the +communication in our FFT routines takes the form of an all-to-all +communication: the multi-dimensional transforms require two all-to-all +communications (or one, if you use `FFTW_TRANSPOSED_ORDER'), while the +one-dimensional transforms require _three_ (or two, if you use +scrambled input or output). + + +File: fftw.info, Node: MPI Tips, Prev: Usage of MPI FFTW for Complex One-dimensional Transforms, Up: MPI FFTW + +4.2.6 MPI Tips +-------------- + +There are several things you should consider in order to get the best +performance out of the MPI FFTW routines. + + First, if possible, the first and second dimensions of your data +should be divisible by the number of processes you are using. (If only +one can be divisible, then you should choose the first dimension.) +This allows the computational load to be spread evenly among the +processes, and also reduces the communications complexity and overhead. +In the one-dimensional transform case, the size of the transform +should ideally be divisible by the _square_ of the number of processors. + + Second, you should consider using the `FFTW_TRANSPOSED_ORDER' output +format if it is not too burdensome. The speed gains from +communications savings are usually substantial. + + Third, you should consider allocating a workspace for +`(r)fftw(nd)_mpi', as this can often (but not always) improve +performance (at the cost of extra storage). + + Fourth, you should experiment with the best number of processors to +use for your problem. (There comes a point of diminishing returns, +when the communications costs outweigh the computational benefits.(1)) +The `fftw_mpi_test' program can output helpful performance benchmarks. It +accepts the same parameters as the uniprocessor test programs (c.f. +`tests/README') and is run like an ordinary MPI program. For example, +`mpirun -np 4 fftw_mpi_test -s 128x128x128' will benchmark a +`128x128x128' transform on four processors, reporting timings and +parallel speedups for all variants of `fftwnd_mpi' (transposed, with +workspace, etcetera). (Note also that there is the `rfftw_mpi_test' +program for the real transforms.) + + ---------- Footnotes ---------- + + (1) An FFT is particularly hard on communications systems, as it +requires an "all-to-all" communication, which is more or less the worst +possible case. + + +File: fftw.info, Node: Calling FFTW from Fortran, Next: Installation and Customization, Prev: Parallel FFTW, Up: Top + +5 Calling FFTW from Fortran +*************************** + +The standard FFTW libraries include special wrapper functions that allow +Fortran programs to call FFTW subroutines. This chapter describes how +those functions may be employed to use FFTW from Fortran. We assume +here that the reader is already familiar with the usage of FFTW in C, as +described elsewhere in this manual. + + In general, it is not possible to call C functions directly from +Fortran, due to Fortran's inability to pass arguments by value and also +because Fortran compilers typically expect identifiers to be mangled somehow +for linking. However, if C functions are written in a special way, +they _are_ callable from Fortran, and we have employed this technique +to create Fortran-callable "wrapper" functions around the main FFTW +routines. These wrapper functions are included in the FFTW libraries +by default, unless a Fortran compiler isn't found on your system or +`--disable-fortran' is included in the `configure' flags. + + As a result, calling FFTW from Fortran requires little more than +appending ``_f77'' to the function names and then linking normally with +the FFTW libraries. There are a few wrinkles, however, as we shall +discuss below. + +* Menu: + +* Wrapper Routines:: +* FFTW Constants in Fortran:: +* Fortran Examples:: + + +File: fftw.info, Node: Wrapper Routines, Next: FFTW Constants in Fortran, Prev: Calling FFTW from Fortran, Up: Calling FFTW from Fortran + +5.1 Wrapper Routines +==================== + +All of the uniprocessor and multi-threaded transform routines have +Fortran-callable wrappers, except for the wisdom import/export functions +(since it is not possible to exchange string and file arguments portably +with Fortran) and the specific planner routines (*note Discussion on +Specific Plans::). The name of the wrapper routine is the same as that +of the corresponding C routine, but with `fftw/fftwnd/rfftw/rfftwnd' +replaced by `fftw_f77/fftwnd_f77/rfftw_f77/rfftwnd_f77'. For example, +in Fortran, instead of calling `fftw_one' you would call +`fftw_f77_one'.(1) For the most part, all of the arguments to the +functions are the same, with the following exceptions: + + * `plan' variables (what would be of type `fftw_plan', + `rfftwnd_plan', etcetera, in C), must be declared as a type that is + the same size as a pointer (address) on your machine. (Fortran + has no generic pointer type.) The Fortran `integer' type is + usually the same size as a pointer, but you need to be wary + (especially on 64-bit machines). (You could also use `integer*4' + on a 32-bit machine and `integer*8' on a 64-bit machine.) Ugh. + (`g77' has a special type, `integer(kind=7)', that is defined to + be the same size as a pointer.) + + * Any function that returns a value (e.g. `fftw_create_plan') is + converted into a subroutine. The return value is converted into an + additional (first) parameter of the wrapper subroutine. (The + reason for this is that some Fortran implementations seem to have + trouble with C function return values.) + + * When performing one-dimensional `FFTW_IN_PLACE' transforms, you + don't have the option of passing `NULL' for the `out' argument + (since there is no way to pass `NULL' from Fortran). Therefore, + when performing such transforms, you _must_ allocate and pass a + contiguous scratch array of the same size as the transform. Note + that for in-place multi-dimensional (`(r)fftwnd') transforms, the + `out' argument is ignored, so you can pass anything for that + parameter. + + * The wrapper routines expect multi-dimensional arrays to be in + column-major order, which is the ordinary format of Fortran arrays. + They do this transparently and costlessly simply by reversing the + order of the dimensions passed to FFTW, but this has one important + consequence for multi-dimensional real-complex transforms, + discussed below. + + + In general, you should take care to use Fortran data types that +correspond to (i.e. are the same size as) the C types used by FFTW. If +your C and Fortran compilers are made by the same vendor, the +correspondence is usually straightforward (i.e. `integer' corresponds +to `int', `real' corresponds to `float', etcetera). Such simple +correspondences are assumed in the examples below. The examples also +assume that FFTW was compiled in double precision (the default). + + ---------- Footnotes ---------- + + (1) Technically, Fortran 77 identifiers are not allowed to have more +than 6 characters, nor may they contain underscores. Any compiler that +enforces this limitation doesn't deserve to link to FFTW. + + +File: fftw.info, Node: FFTW Constants in Fortran, Next: Fortran Examples, Prev: Wrapper Routines, Up: Calling FFTW from Fortran + +5.2 FFTW Constants in Fortran +============================= + +When creating plans in FFTW, a number of constants are used to specify +options, such as `FFTW_FORWARD' or `FFTW_USE_WISDOM'. The same +constants must be used with the wrapper routines, but of course the C +header files where the constants are defined can't be incorporated +directly into Fortran code. + + Instead, we have placed Fortran equivalents of the FFTW constant +definitions in the file `fortran/fftw_f77.i' of the FFTW package. If +your Fortran compiler supports a preprocessor, you can use that to +incorporate this file into your code whenever you need to call FFTW. +Otherwise, you will have to paste the constant definitions in directly. +They are: + + integer FFTW_FORWARD,FFTW_BACKWARD + parameter (FFTW_FORWARD=-1,FFTW_BACKWARD=1) + + integer FFTW_REAL_TO_COMPLEX,FFTW_COMPLEX_TO_REAL + parameter (FFTW_REAL_TO_COMPLEX=-1,FFTW_COMPLEX_TO_REAL=1) + + integer FFTW_ESTIMATE,FFTW_MEASURE + parameter (FFTW_ESTIMATE=0,FFTW_MEASURE=1) + + integer FFTW_OUT_OF_PLACE,FFTW_IN_PLACE,FFTW_USE_WISDOM + parameter (FFTW_OUT_OF_PLACE=0) + parameter (FFTW_IN_PLACE=8,FFTW_USE_WISDOM=16) + + integer FFTW_THREADSAFE + parameter (FFTW_THREADSAFE=128) + + In C, you combine different flags (like `FFTW_USE_WISDOM' and +`FFTW_MEASURE') using the ``|'' operator; in Fortran you should just +use ``+''. + + +File: fftw.info, Node: Fortran Examples, Prev: FFTW Constants in Fortran, Up: Calling FFTW from Fortran + +5.3 Fortran Examples +==================== + +In C you might have something like the following to transform a +one-dimensional complex array: + + fftw_complex in[N], *out[N]; + fftw_plan plan; + + plan = fftw_create_plan(N,FFTW_FORWARD,FFTW_ESTIMATE); + fftw_one(plan,in,out); + fftw_destroy_plan(plan); + + In Fortran, you use the following to accomplish the same thing: + + double complex in, out + dimension in(N), out(N) + integer plan + + call fftw_f77_create_plan(plan,N,FFTW_FORWARD,FFTW_ESTIMATE) + call fftw_f77_one(plan,in,out) + call fftw_f77_destroy_plan(plan) + + Notice how all routines are called as Fortran subroutines, and the +plan is returned via the first argument to `fftw_f77_create_plan'. +_Important:_ these examples assume that `integer' is the same size as a +pointer, and may need modification on a 64-bit machine. *Note Wrapper +Routines::, above. To do the same thing, but using 8 threads in +parallel (*note Multi-threaded FFTW::), you would simply replace the +call to `fftw_f77_one' with: + + call fftw_f77_threads_one(8,plan,in,out) + + To transform a three-dimensional array in-place with C, you might do: + + fftw_complex arr[L][M][N]; + fftwnd_plan plan; + int n[3] = {L,M,N}; + + plan = fftwnd_create_plan(3,n,FFTW_FORWARD, + FFTW_ESTIMATE | FFTW_IN_PLACE); + fftwnd_one(plan, arr, 0); + fftwnd_destroy_plan(plan); + + In Fortran, you would use this instead: + + double complex arr + dimension arr(L,M,N) + integer n + dimension n(3) + integer plan + + n(1) = L + n(2) = M + n(3) = N + call fftwnd_f77_create_plan(plan,3,n,FFTW_FORWARD, + + FFTW_ESTIMATE + FFTW_IN_PLACE) + call fftwnd_f77_one(plan, arr, 0) + call fftwnd_f77_destroy_plan(plan) + + Instead of calling `fftwnd_f77_create_plan(plan,3,n,...)', we could +also have called `fftw3d_f77_create_plan(plan,L,M,N,...)'. + + Note that we pass the array dimensions in the "natural" order; also +note that the last argument to `fftwnd_f77' is ignored since the +transform is `FFTW_IN_PLACE'. + + To transform a one-dimensional real array in Fortran, you might do: + + double precision in, out + dimension in(N), out(N) + integer plan + + call rfftw_f77_create_plan(plan,N,FFTW_REAL_TO_COMPLEX, + + FFTW_ESTIMATE) + call rfftw_f77_one(plan,in,out) + call rfftw_f77_destroy_plan(plan) + + To transform a two-dimensional real array, out of place, you might +use the following: + + double precision in + double complex out + dimension in(M,N), out(M/2 + 1, N) + integer plan + + call rfftw2d_f77_create_plan(plan,M,N,FFTW_REAL_TO_COMPLEX, + + FFTW_ESTIMATE) + call rfftwnd_f77_one_real_to_complex(plan, in, out) + call rfftwnd_f77_destroy_plan(plan) + + Important: Notice that it is the _first_ dimension of the complex +output array that is cut in half in Fortran, rather than the last +dimension as in C. This is a consequence of the wrapper routines +reversing the order of the array dimensions passed to FFTW so that the +Fortran program can use its ordinary column-major order. + + +File: fftw.info, Node: Installation and Customization, Next: Acknowledgments, Prev: Calling FFTW from Fortran, Up: Top + +6 Installation and Customization +******************************** + +This chapter describes the installation and customization of FFTW, the +latest version of which may be downloaded from the FFTW home page +(http://www.fftw.org). + + As distributed, FFTW makes very few assumptions about your system. +All you need is an ANSI C compiler (`gcc' is fine, although +vendor-provided compilers often produce faster code). However, +installation of FFTW is somewhat simpler if you have a Unix or a GNU +system, such as Linux. In this chapter, we first describe the +installation of FFTW on Unix and non-Unix systems. We then describe how +you can customize FFTW to achieve better performance. Specifically, you +can I) enable `gcc'/x86-specific hacks that improve performance on +Pentia and PentiumPro's; II) adapt FFTW to use the high-resolution clock +of your machine, if any; III) produce code (_codelets_) to support fast +transforms of sizes that are not supported efficiently by the standard +FFTW distribution. + +* Menu: + +* Installation on Unix:: +* Installation on non-Unix Systems:: +* Installing FFTW in both single and double precision:: +* gcc and Pentium hacks:: +* Customizing the timer:: +* Generating your own code:: + + +File: fftw.info, Node: Installation on Unix, Next: Installation on non-Unix Systems, Prev: Installation and Customization, Up: Installation and Customization + +6.1 Installation on Unix +======================== + +FFTW comes with a `configure' program in the GNU style. Installation +can be as simple as: + + ./configure + make + make install + + This will build the uniprocessor complex and real transform libraries +along with the test programs. We strongly recommend that you use GNU +`make' if it is available; on some systems it is called `gmake'. The +"`make install'" command installs the fftw and rfftw libraries in +standard places, and typically requires root privileges (unless you +specify a different install directory with the `--prefix' flag to +`configure'). You can also type "`make check'" to put the FFTW test +programs through their paces. If you have problems during +configuration or compilation, you may want to run "`make distclean'" +before trying again; this ensures that you don't have any stale files +left over from previous compilation attempts. + + The `configure' script knows good `CFLAGS' (C compiler flags) for a +few systems. If your system is not known, the `configure' script will +print out a warning. (1) In this case, you can compile FFTW with the +command + make CFLAGS="" + If you do find an optimal set of `CFLAGS' for your system, please +let us know what they are (along with the output of `config.guess') so +that we can include them in future releases. + + The `configure' program supports all the standard flags defined by +the GNU Coding Standards; see the `INSTALL' file in FFTW or the GNU web +page (http://www.gnu.org/prep/standards_toc.html). Note especially +`--help' to list all flags and `--enable-shared' to create shared, +rather than static, libraries. `configure' also accepts a few +FFTW-specific flags, particularly: + + * `--enable-float' Produces a single-precision version of FFTW + (`float') instead of the default double-precision (`double'). + *Note Installing FFTW in both single and double precision::. + + * `--enable-type-prefix' Adds a `d' or `s' prefix to all installed + libraries and header files to indicate the floating-point + precision. *Note Installing FFTW in both single and double + precision::. (`--enable-type-prefix=' lets you add an + arbitrary prefix.) By default, no prefix is used. + + * `--enable-threads' Enables compilation and installation of the FFTW + threads library (*note Multi-threaded FFTW::), which provides a + simple interface to parallel transforms for SMP systems. (By + default, the threads routines are not compiled.) + + * `--enable-mpi' Enables compilation and installation of the FFTW MPI + library (*note MPI FFTW::), which provides parallel transforms for + distributed-memory systems with MPI. (By default, the MPI + routines are not compiled.) + + * `--disable-fortran' Disables inclusion of Fortran-callable wrapper + routines (*note Calling FFTW from Fortran::) in the standard FFTW + libraries. These wrapper routines increase the library size by + only a negligible amount, so they are included by default as long + as the `configure' script finds a Fortran compiler on your system. + + * `--with-gcc' Enables the use of `gcc'. By default, FFTW uses the + vendor-supplied `cc' compiler if present. Unfortunately, `gcc' + produces slower code than `cc' on many systems. + + * `--enable-i386-hacks' *Note gcc and Pentium hacks::, below. + + * `--enable-pentium-timer' *Note gcc and Pentium hacks::, below. + + + To force `configure' to use a particular C compiler (instead of the default, +usually `cc'), set the environment variable `CC' to the name of the +desired compiler before running `configure'; you may also need to set +the flags via the variable `CFLAGS'. + + ---------- Footnotes ---------- + + (1) Each version of `cc' seems to have its own magic incantation to +get the fastest code most of the time--you'd think that people would +have agreed upon some convention, e.g. "`-Omax'", by now. + + +File: fftw.info, Node: Installation on non-Unix Systems, Next: Installing FFTW in both single and double precision, Prev: Installation on Unix, Up: Installation and Customization + +6.2 Installation on non-Unix Systems +==================================== + +It is quite straightforward to install FFTW even on non-Unix systems +lacking the niceties of the `configure' script. The FFTW Home Page may +include some FFTW packages preconfigured for particular +systems/compilers, and also contains installation notes sent in by users. +All you really need to do, though, is to compile all of the `.c' files +in the appropriate directories of the FFTW package. (You needn't worry +about the many extraneous files lying around.) + + For the complex transforms, compile all of the `.c' files in the +`fftw' directory and link them into a library. Similarly, for the real +transforms, compile all of the `.c' files in the `rfftw' directory into +a library. Note that these sources `#include' various files in the +`fftw' and `rfftw' directories, so you may need to set up the +`#include' paths for your compiler appropriately. Be sure to enable +the highest-possible level of optimization in your compiler. + + By default, FFTW is compiled for double-precision transforms. To +work in single precision rather than double precision, `#define' the +symbol `FFTW_ENABLE_FLOAT' in `fftw.h' (in the `fftw' directory) and +(re)compile FFTW. + + These libraries should be linked with any program that uses the +corresponding transforms. The required header files, `fftw.h' and +`rfftw.h', are located in the `fftw' and `rfftw' directories +respectively; you may want to put them with the libraries, or wherever +header files normally go on your system. + + FFTW includes test programs, `fftw_test' and `rfftw_test', in the +`tests' directory. These are compiled and linked like any program +using FFTW, except that they use additional header files located in the +`fftw' and `rfftw' directories, so you will need to set your compiler +`#include' paths appropriately. `fftw_test' is compiled from +`fftw_test.c' and `test_main.c', while `rfftw_test' is compiled from +`rfftw_test.c' and `test_main.c'. When you run these programs, you +will be prompted interactively for various possible tests to perform; +see also `tests/README' for more information. + + +File: fftw.info, Node: Installing FFTW in both single and double precision, Next: gcc and Pentium hacks, Prev: Installation on non-Unix Systems, Up: Installation and Customization + +6.3 Installing FFTW in both single and double precision +======================================================= + +It is often useful to install both single- and double-precision versions +of the FFTW libraries on the same machine, and we provide a convenient +mechanism for achieving this on Unix systems. + + When the `--enable-type-prefix' option of configure is used, the +FFTW libraries and header files are installed with a prefix of `d' or +`s', depending upon whether you compiled in double or single precision. +Then, instead of linking your program with `-lrfftw -lfftw', for +example, you would link with `-ldrfftw -ldfftw' to use the +double-precision version or with `-lsrfftw -lsfftw' to use the +single-precision version. Also, you would `#include' `' or +`' instead of `', and so on. + + _The names of FFTW functions, data types, and constants remain +unchanged!_ You still call, for instance, `fftw_one' and not +`dfftw_one'. Only the names of header files and libraries are +modified. One consequence of this is that _you cannot use both the +single- and double-precision FFTW libraries in the same program, +simultaneously,_ as the function names would conflict. + + So, to install both the single- and double-precision libraries on the +same machine, you would do: + + ./configure --enable-type-prefix [ other options ] + make + make install + make clean + ./configure --enable-float --enable-type-prefix [ other options ] + make + make install + + +File: fftw.info, Node: gcc and Pentium hacks, Next: Customizing the timer, Prev: Installing FFTW in both single and double precision, Up: Installation and Customization + +6.4 `gcc' and Pentium hacks +=========================== + +The `configure' option `--enable-i386-hacks' enables specific +optimizations for the Pentium and later x86 CPUs under gcc, which can +significantly improve performance of double-precision transforms. +Specifically, we have tested these hacks on Linux with `gcc' 2.[789] +and versions of `egcs' since 1.0.3. These optimizations affect only +the performance and not the correctness of FFTW (i.e. it is always safe +to try them out). + + These hacks provide a workaround to the incorrect alignment of local +`double' variables in `gcc'. The compiler aligns these variables to +multiples of 4 bytes, but execution is much faster (on Pentium and +PentiumPro) if `double's are aligned to a multiple of 8 bytes. By +carefully counting the number of variables allocated by the compiler in +performance-critical regions of the code, we have been able to +introduce dummy allocations (using `alloca') that align the stack +properly. The hack depends crucially on the compiler flags that are +used. For example, it won't work without `-fomit-frame-pointer'. + + In principle, these hacks are no longer required under `gcc' +versions 2.95 and later, which automatically align the stack correctly +(see `-mpreferred-stack-boundary' in the `gcc' manual). However, we +have encountered a bug +(http://egcs.cygnus.com/ml/gcc-bugs/1999-11/msg00259.html) in the stack +alignment of versions 2.95.[012] that causes FFTW's stack to be +misaligned under some circumstances. The `configure' script +automatically detects this bug and disables `gcc''s stack alignment in +favor of our own hacks when `--enable-i386-hacks' is used. + + The `fftw_test' program outputs speed measurements that you can use +to see if these hacks are beneficial. + + The `configure' option `--enable-pentium-timer' enables the use of +the Pentium and PentiumPro cycle counter for timing purposes. In order +to get correct results, you must define `FFTW_CYCLES_PER_SEC' in +`fftw/config.h' to be the clock speed of your processor; the resulting +FFTW library will be nonportable. The use of this option is +deprecated. On serious operating systems (such as Linux), FFTW uses +`gettimeofday()', which has enough resolution and is portable. (Note +that Win32 has its own high-resolution timing routines as well. FFTW +contains unsupported code to use these routines.) + + +File: fftw.info, Node: Customizing the timer, Next: Generating your own code, Prev: gcc and Pentium hacks, Up: Installation and Customization + +6.5 Customizing the timer +========================= + +FFTW needs a reasonably-precise clock in order to find the optimal way +to compute a transform. On Unix systems, `configure' looks for +`gettimeofday' and other system-specific timers. If it does not find +any high resolution clock, it defaults to using the `clock()' function, +which is very portable, but forces FFTW to run for a long time in order +to get reliable measurements. + + If your machine supports a high-resolution clock not recognized by +FFTW, it is therefore advisable to use it. You must edit +`fftw/fftw-int.h'. There are a few macros you must redefine. The code +is documented and should be self-explanatory. (By the way, `fftw-int' +stands for `fftw-internal', but for some inexplicable reason people are +still using primitive systems with 8.3 filenames.) + + Even if you don't install high-resolution timing code, we still +recommend that you look at the `FFTW_TIME_MIN' constant in `fftw/fftw-int.h'. +This constant holds the minimum time interval (in seconds) required to +get accurate timing measurements, and should be (at least) several +hundred times the resolution of your clock. The default constants are +on the conservative side, and may cause FFTW to take longer than +necessary when you create a plan. Set `FFTW_TIME_MIN' to whatever is +appropriate on your system (be sure to set the _right_ +`FFTW_TIME_MIN'...there are several definitions in `fftw-int.h', +corresponding to different platforms and timers). + + As an aid in checking the resolution of your clock, you can use the +`tests/fftw_test' program with the `-t' option (c.f. `tests/README'). +Remember, the mere fact that your clock reports times in, say, +picoseconds, does not mean that it is actually _accurate_ to that +resolution. + + +File: fftw.info, Node: Generating your own code, Prev: Customizing the timer, Up: Installation and Customization + +6.6 Generating your own code +============================ + +If you know that you will only use transforms of a certain size (say, +powers of 2) and want to reduce the size of the library, you can +reconfigure FFTW to support only those sizes you are interested in. You +may even generate code to enable efficient transforms of a size not +supported by the default distribution. The default distribution +supports transforms of any size, but not all sizes are equally fast. +The default installation of FFTW is best at handling sizes of the form +2^a 3^b 5^c 7^d 11^e 13^f, where e+f is either 0 or 1, and the other +exponents are arbitrary. Other sizes are computed by means of a slow, +general-purpose routine. However, if you have an application that +requires fast transforms of size, say, `17', there is a way to generate +specialized code to handle that. + + The directory `gensrc' contains all the programs and scripts that +were used to generate FFTW. In particular, the program +`gensrc/genfft.ml' was used to generate the code that FFTW uses to +compute the transforms. We do not expect casual users to use it. +`genfft' is a rather sophisticated program that generates directed +acyclic graphs of FFT algorithms and performs algebraic simplifications +on them. `genfft' is written in Objective Caml, a dialect of ML. +Objective Caml is described at `http://pauillac.inria.fr/ocaml/' and +can be downloaded from from `ftp://ftp.inria.fr/lang/caml-light'. + + If you have Objective Caml installed, you can type `sh bootstrap.sh' +in the top-level directory to re-generate the files. If you change the +`gensrc/config' file, you can optimize FFTW for sizes that are not +currently supported efficiently (say, 17 or 19). + + We do not provide more details about the code-generation process, +since we do not expect that users will need to generate their own code. +However, feel free to contact us at if you are +interested in the subject. + + You might find it interesting to learn Caml and/or some modern +programming techniques that we used in the generator (including monadic +programming), especially if you heard the rumor that Java and +object-oriented programming are the latest advancement in the field. +The internal operation of the codelet generator is described in the +paper, "A Fast Fourier Transform Compiler," by M. Frigo, which is +available from the FFTW home page (http://www.fftw.org) and will appear +in the `Proceedings of the 1999 ACM SIGPLAN Conference on Programming +Language Design and Implementation (PLDI)'. + + +File: fftw.info, Node: Acknowledgments, Next: License and Copyright, Prev: Installation and Customization, Up: Top + +7 Acknowledgments +***************** + +Matteo Frigo was supported in part by the Defense Advanced Research +Projects Agency (DARPA) under Grants N00014-94-1-0985 and +F30602-97-1-0270, and by a Digital Equipment Corporation Fellowship. +Steven G. Johnson was supported in part by a DoD NDSEG Fellowship, an +MIT Karl Taylor Compton Fellowship, and by the Materials Research +Science and Engineering Center program of the National Science +Foundation under award DMR-9400334. + + Both authors were also supported in part by their respective +girlfriends, by the letters "Q" and "R", and by the number 12. + + We are grateful to SUN Microsystems Inc. for its donation of a +cluster of 9 8-processor Ultra HPC 5000 SMPs (24 Gflops peak). These +machines served as the primary platform for the development of earlier +versions of FFTW. + + We thank Intel Corporation for donating a four-processor Pentium Pro +machine. We thank the Linux community for giving us a decent OS to run +on that machine. + + The `genfft' program was written using Objective Caml, a dialect of +ML. Objective Caml is a small and elegant language developed by Xavier +Leroy. The implementation is available from `ftp.inria.fr' in the +directory `lang/caml-light'. We used versions 1.07 and 2.00 of the +software. In previous releases of FFTW, `genfft' was written in Caml +Light, by the same authors. An even earlier implementation of `genfft' +was written in Scheme, but Caml is definitely better for this kind of +application. + + FFTW uses many tools from the GNU project, including `automake', +`texinfo', and `libtool'. + + Prof. Charles E. Leiserson of MIT provided continuous support and +encouragement. This program would not exist without him. Charles also +proposed the name "codelets" for the basic FFT blocks. + + Prof. John D. Joannopoulos of MIT demonstrated continuing tolerance +of Steven's "extra-curricular" computer-science activities. Steven's +chances at a physics degree would not exist without him. + + Andrew Sterian contributed the Windows timing code. + + Didier Miras reported a bug in the test procedure used in FFTW 1.2. +We now use a completely different test algorithm by Funda Ergun that +does not require a separate FFT program to compare against. + + Wolfgang Reimer contributed the Pentium cycle counter and a few fixes +that help portability. + + Ming-Chang Liu uncovered a well-hidden bug in the complex transforms +of FFTW 2.0 and supplied a patch to correct it. + + The FFTW FAQ was written in `bfnn' (Bizarre Format With No Name) and +formatted using the tools developed by Ian Jackson for the Linux FAQ. + + _We are especially thankful to all of our users for their continuing +support, feedback, and interest during our development of FFTW._ + + +File: fftw.info, Node: License and Copyright, Next: Concept Index, Prev: Acknowledgments, Up: Top + +8 License and Copyright +*********************** + +FFTW is copyright (C) 1997-1999 Massachusetts Institute of Technology. + + FFTW is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + + You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. You can +also find the GPL on the GNU web site +(http://www.gnu.org/copyleft/gpl.html). + + In addition, we kindly ask you to acknowledge FFTW and its authors in +any program or publication in which you use FFTW. (You are not +_required_ to do so; it is up to your common sense to decide whether +you want to comply with this request or not.) + + Non-free versions of FFTW are available under terms different than +the General Public License. (e.g. they do not require you to accompany +any object code using FFTW with the corresponding source code.) For +these alternate terms you must purchase a license from MIT's Technology +Licensing Office. Users interested in such a license should contact us +() for more information. + + +File: fftw.info, Node: Concept Index, Next: Library Index, Prev: License and Copyright, Up: Top + +9 Concept Index +*************** + +[index] +* Menu: + +* algorithm: Introduction. (line 90) +* benchfft: Introduction. (line 29) +* benchmark <1>: gcc and Pentium hacks. + (line 35) +* benchmark <2>: MPI Tips. (line 28) +* benchmark <3>: How Many Threads to Use?. + (line 28) +* benchmark: Introduction. (line 29) +* blocking: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 111) +* C multi-dimensional arrays: Static Arrays in C. (line 6) +* Caml <1>: Acknowledgments. (line 33) +* Caml: Generating your own code. + (line 6) +* Cilk <1>: Parallel FFTW. (line 32) +* Cilk: Introduction. (line 71) +* clock: Customizing the timer. + (line 11) +* code generator <1>: Generating your own code. + (line 6) +* code generator: Introduction. (line 61) +* column-major <1>: Fortran Examples. (line 98) +* column-major <2>: Wrapper Routines. (line 42) +* column-major: Column-major Format. (line 6) +* compiler <1>: gcc and Pentium hacks. + (line 15) +* compiler <2>: Installation on non-Unix Systems. + (line 9) +* compiler <3>: Installation on Unix. + (line 76) +* compiler <4>: Installation and Customization. + (line 12) +* compiler <5>: Calling FFTW from Fortran. + (line 14) +* compiler: Introduction. (line 80) +* compiler flags: Installation on Unix. + (line 25) +* complex multi-dimensional transform <1>: Multi-dimensional Transforms Reference. + (line 6) +* complex multi-dimensional transform: Complex Multi-dimensional Transforms Tutorial. + (line 6) +* complex number: Data Types. (line 6) +* complex one-dimensional transform: Complex One-dimensional Transforms Tutorial. + (line 6) +* complex to real transform <1>: Real One-dimensional Transforms Reference. + (line 9) +* complex to real transform: Real One-dimensional Transforms Tutorial. + (line 6) +* complex transform: Complex One-dimensional Transforms Tutorial. + (line 6) +* configure <1>: Installing FFTW in both single and double precision. + (line 10) +* configure <2>: Installation on Unix. + (line 7) +* configure <3>: MPI FFTW Installation. + (line 11) +* configure <4>: Installation and Supported Hardware/Software. + (line 12) +* configure: Data Types. (line 54) +* convolution: Real Multi-dimensional Transforms Tutorial. + (line 96) +* cyclic convolution: Real Multi-dimensional Transforms Tutorial. + (line 96) +* Discrete Fourier Transform <1>: What RFFTWND Really Computes. + (line 6) +* Discrete Fourier Transform <2>: What RFFTW Really Computes. + (line 6) +* Discrete Fourier Transform <3>: What FFTWND Really Computes. + (line 6) +* Discrete Fourier Transform: What FFTW Really Computes. + (line 6) +* distributed array format <1>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 50) +* distributed array format <2>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 96) +* distributed array format: MPI Data Layout. (line 6) +* distributed memory <1>: MPI Data Layout. (line 6) +* distributed memory <2>: MPI FFTW. (line 10) +* distributed memory: Parallel FFTW. (line 22) +* Ecclesiastes: Caveats in Using Wisdom. + (line 7) +* executor: Introduction. (line 39) +* FFTW: Introduction. (line 16) +* FFTWND: Multi-dimensional Transforms Reference. + (line 8) +* flags <1>: FFTW Constants in Fortran. + (line 35) +* flags <2>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 29) +* flags <3>: rfftwnd_create_plan. (line 64) +* flags <4>: rfftw_create_plan. (line 53) +* flags <5>: fftwnd_create_plan. (line 88) +* flags <6>: fftw_create_plan. (line 55) +* flags <7>: Complex Multi-dimensional Transforms Tutorial. + (line 24) +* flags: Complex One-dimensional Transforms Tutorial. + (line 35) +* floating-point precision <1>: Installing FFTW in both single and double precision. + (line 6) +* floating-point precision <2>: Installation on non-Unix Systems. + (line 22) +* floating-point precision <3>: Installation on Unix. + (line 41) +* floating-point precision <4>: Wrapper Routines. (line 50) +* floating-point precision: Data Types. (line 50) +* Fortran-callable wrappers <1>: Installation on Unix. + (line 61) +* Fortran-callable wrappers <2>: Calling FFTW from Fortran. + (line 6) +* Fortran-callable wrappers: Column-major Format. (line 18) +* frequency <1>: What FFTW Really Computes. + (line 17) +* frequency <2>: Complex Multi-dimensional Transforms Tutorial. + (line 54) +* frequency: Complex One-dimensional Transforms Tutorial. + (line 64) +* gettimeofday: Customizing the timer. + (line 11) +* girlfriends: Acknowledgments. (line 15) +* halfcomplex array <1>: Data Types. (line 6) +* halfcomplex array: Real One-dimensional Transforms Tutorial. + (line 11) +* hermitian array <1>: What RFFTWND Really Computes. + (line 15) +* hermitian array: Data Types. (line 38) +* in-place transform <1>: Wrapper Routines. (line 33) +* in-place transform <2>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 75) +* in-place transform <3>: Tips for Optimal Threading. + (line 18) +* in-place transform <4>: Array Dimensions for Real Multi-dimensional Transforms. + (line 25) +* in-place transform <5>: rfftwnd_create_plan. (line 77) +* in-place transform <6>: fftwnd. (line 49) +* in-place transform <7>: fftw. (line 44) +* in-place transform <8>: fftw_create_plan. (line 69) +* in-place transform: Complex Multi-dimensional Transforms Tutorial. + (line 28) +* installation: Installation and Customization. + (line 21) +* linking on Unix <1>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 78) +* linking on Unix <2>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 134) +* linking on Unix <3>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 117) +* linking on Unix <4>: Usage of Multi-threaded FFTW. + (line 85) +* linking on Unix <5>: Real One-dimensional Transforms Tutorial. + (line 99) +* linking on Unix: Complex One-dimensional Transforms Tutorial. + (line 72) +* LISP <1>: Acknowledgments. (line 33) +* LISP: Importing and Exporting Wisdom. + (line 36) +* load-balancing: MPI Tips. (line 9) +* malloc <1>: Memory Allocator Reference. + (line 10) +* malloc: Dynamic Arrays in C. (line 15) +* ML: Generating your own code. + (line 6) +* monadic programming: Generating your own code. + (line 39) +* MPI <1>: Installation on Unix. + (line 56) +* MPI <2>: MPI FFTW. (line 6) +* MPI <3>: Parallel FFTW. (line 22) +* MPI: Introduction. (line 67) +* MPI_Alltoall <1>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 48) +* MPI_Alltoall <2>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 91) +* MPI_Alltoall: MPI FFTW Installation. + (line 23) +* MPI_Barrier: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 111) +* MPI_COMM_WORLD <1>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 23) +* MPI_COMM_WORLD: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 57) +* MPI_Finalize: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 38) +* MPI_Init: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 38) +* multi-dimensional transform <1>: Real Multi-dimensional Transforms Reference. + (line 6) +* multi-dimensional transform <2>: Multi-dimensional Transforms Reference. + (line 6) +* multi-dimensional transform: Complex Multi-dimensional Transforms Tutorial. + (line 6) +* n_fields <1>: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 40) +* n_fields: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 79) +* nerd-readable text: Importing and Exporting Wisdom. + (line 36) +* normalization <1>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 103) +* normalization <2>: What RFFTW Really Computes. + (line 24) +* normalization <3>: What FFTW Really Computes. + (line 13) +* normalization <4>: Real Multi-dimensional Transforms Tutorial. + (line 89) +* normalization <5>: Real One-dimensional Transforms Tutorial. + (line 68) +* normalization <6>: Complex Multi-dimensional Transforms Tutorial. + (line 58) +* normalization: Complex One-dimensional Transforms Tutorial. + (line 69) +* number of threads <1>: How Many Threads to Use?. + (line 6) +* number of threads: Usage of Multi-threaded FFTW. + (line 64) +* out-of-place transform: Complex Multi-dimensional Transforms Tutorial. + (line 28) +* padding <1>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 94) +* padding <2>: Array Dimensions for Real Multi-dimensional Transforms. + (line 25) +* padding: Real Multi-dimensional Transforms Tutorial. + (line 68) +* parallel transform <1>: Parallel FFTW. (line 6) +* parallel transform: Introduction. (line 67) +* Pentium hack: gcc and Pentium hacks. + (line 6) +* plan <1>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 44) +* plan <2>: Complex One-dimensional Transforms Tutorial. + (line 21) +* plan: Introduction. (line 36) +* planner: Introduction. (line 37) +* power spectrum: Real One-dimensional Transforms Tutorial. + (line 77) +* rank: Complex Multi-dimensional Transforms Tutorial. + (line 7) +* real multi-dimensional transform <1>: Real Multi-dimensional Transforms Reference. + (line 6) +* real multi-dimensional transform: Real Multi-dimensional Transforms Tutorial. + (line 6) +* real number: Data Types. (line 6) +* real transform <1>: Real One-dimensional Transforms Reference. + (line 9) +* real transform: Real One-dimensional Transforms Tutorial. + (line 6) +* RFFTW <1>: Real One-dimensional Transforms Reference. + (line 9) +* RFFTW: Real One-dimensional Transforms Tutorial. + (line 6) +* RFFTWND: Real Multi-dimensional Transforms Reference. + (line 9) +* rfftwnd array format <1>: Fortran Examples. (line 98) +* rfftwnd array format <2>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 96) +* rfftwnd array format <3>: Strides in In-place RFFTWND. + (line 6) +* rfftwnd array format <4>: Array Dimensions for Real Multi-dimensional Transforms. + (line 6) +* rfftwnd array format: Real Multi-dimensional Transforms Tutorial. + (line 51) +* row-major <1>: MPI Data Layout. (line 91) +* row-major <2>: fftwnd_create_plan. (line 65) +* row-major <3>: Row-major Format. (line 6) +* row-major: Real Multi-dimensional Transforms Tutorial. + (line 145) +* saving plans to disk: Words of Wisdom. (line 6) +* slab decomposition: MPI Data Layout. (line 14) +* specific planner: Discussion on Specific Plans. + (line 6) +* stride <1>: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 79) +* stride <2>: Strides in In-place RFFTWND. + (line 6) +* stride <3>: rfftwnd. (line 52) +* stride <4>: rfftw. (line 18) +* stride <5>: fftwnd. (line 20) +* stride <6>: fftw. (line 19) +* stride: Row-major Format. (line 25) +* thread safety <1>: Using Multi-threaded FFTW in a Multi-threaded Program. + (line 6) +* thread safety: Thread safety. (line 6) +* threads <1>: Installation on Unix. + (line 51) +* threads <2>: Multi-threaded FFTW. (line 6) +* threads <3>: Parallel FFTW. (line 13) +* threads <4>: Thread safety. (line 6) +* threads: Introduction. (line 67) +* timer, customization of: Customizing the timer. + (line 6) +* Tutorial: Tutorial. (line 6) +* wisdom <1>: Wisdom Reference. (line 12) +* wisdom <2>: rfftwnd_create_plan. (line 88) +* wisdom <3>: fftwnd_create_plan. (line 109) +* wisdom <4>: fftw_create_plan. (line 95) +* wisdom: Words of Wisdom. (line 6) +* wisdom, import and export: Importing and Exporting Wisdom. + (line 6) +* wisdom, problems with: Caveats in Using Wisdom. + (line 6) + + +File: fftw.info, Node: Library Index, Prev: Concept Index, Up: Top + +10 Library Index +**************** + +[index] +* Menu: + +* fftw: fftw. (line 14) +* fftw2d_create_plan <1>: fftwnd_create_plan. (line 33) +* fftw2d_create_plan: Complex Multi-dimensional Transforms Tutorial. + (line 40) +* fftw2d_create_plan_specific: fftwnd_create_plan. (line 33) +* fftw2d_mpi_create_plan: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 50) +* fftw3d_create_plan <1>: fftwnd_create_plan. (line 33) +* fftw3d_create_plan: Complex Multi-dimensional Transforms Tutorial. + (line 40) +* fftw3d_create_plan_specific: fftwnd_create_plan. (line 33) +* fftw3d_f77_create_plan: Fortran Examples. (line 64) +* fftw3d_mpi_create_plan: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 54) +* FFTW_BACKWARD: Complex One-dimensional Transforms Tutorial. + (line 31) +* FFTW_COMPLEX: Data Types. (line 59) +* fftw_complex <1>: Data Types. (line 23) +* fftw_complex: Complex One-dimensional Transforms Tutorial. + (line 61) +* FFTW_COMPLEX_TO_REAL <1>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 102) +* FFTW_COMPLEX_TO_REAL <2>: rfftw_create_plan. (line 51) +* FFTW_COMPLEX_TO_REAL: Real One-dimensional Transforms Tutorial. + (line 34) +* fftw_create_plan <1>: fftw_create_plan. (line 15) +* fftw_create_plan: Complex One-dimensional Transforms Tutorial. + (line 26) +* fftw_create_plan_specific: fftw_create_plan. (line 15) +* fftw_destroy_plan <1>: fftw_destroy_plan. (line 9) +* fftw_destroy_plan: Complex One-dimensional Transforms Tutorial. + (line 48) +* fftw_direction <1>: rfftwnd_create_plan. (line 16) +* fftw_direction <2>: fftwnd_create_plan. (line 33) +* fftw_direction <3>: fftw_create_plan. (line 15) +* fftw_direction <4>: Real One-dimensional Transforms Tutorial. + (line 30) +* fftw_direction <5>: Complex Multi-dimensional Transforms Tutorial. + (line 16) +* fftw_direction: Complex One-dimensional Transforms Tutorial. + (line 26) +* FFTW_ENABLE_FLOAT: Data Types. (line 54) +* FFTW_ESTIMATE: Complex One-dimensional Transforms Tutorial. + (line 40) +* fftw_export_wisdom <1>: fftw_export_wisdom. (line 11) +* fftw_export_wisdom: Words of Wisdom. (line 30) +* fftw_export_wisdom_to_file <1>: fftw_export_wisdom. (line 11) +* fftw_export_wisdom_to_file: Importing and Exporting Wisdom. + (line 8) +* fftw_export_wisdom_to_string <1>: fftw_export_wisdom. (line 11) +* fftw_export_wisdom_to_string: Importing and Exporting Wisdom. + (line 20) +* fftw_f77_create_plan: Fortran Examples. (line 25) +* fftw_f77_destroy_plan: Fortran Examples. (line 25) +* fftw_f77_one <1>: Fortran Examples. (line 25) +* fftw_f77_one: Wrapper Routines. (line 14) +* fftw_f77_threads_one: Fortran Examples. (line 35) +* fftw_forget_wisdom: fftw_forget_wisdom. (line 9) +* FFTW_FORWARD: Complex One-dimensional Transforms Tutorial. + (line 31) +* fftw_free_hook: Memory Allocator Reference. + (line 10) +* fftw_import_wisdom <1>: fftw_import_wisdom. (line 11) +* fftw_import_wisdom: Words of Wisdom. (line 39) +* fftw_import_wisdom_from_file <1>: fftw_import_wisdom. (line 11) +* fftw_import_wisdom_from_file: Importing and Exporting Wisdom. + (line 8) +* fftw_import_wisdom_from_string <1>: fftw_import_wisdom. (line 11) +* fftw_import_wisdom_from_string: Importing and Exporting Wisdom. + (line 20) +* FFTW_IN_PLACE: Complex Multi-dimensional Transforms Tutorial. + (line 24) +* fftw_malloc: Memory Allocator Reference. + (line 10) +* fftw_malloc_hook: Memory Allocator Reference. + (line 10) +* FFTW_MEASURE: Complex One-dimensional Transforms Tutorial. + (line 36) +* fftw_mpi: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 39) +* fftw_mpi_create_plan: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 19) +* fftw_mpi_destroy_plan: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 25) +* fftw_mpi_local_sizes: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 58) +* fftw_mpi_plan: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 19) +* fftw_mpi_test <1>: MPI Tips. (line 28) +* fftw_mpi_test: MPI FFTW Installation. + (line 26) +* FFTW_NORMAL_ORDER: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 97) +* fftw_one <1>: fftw. (line 14) +* fftw_one: Complex One-dimensional Transforms Tutorial. + (line 54) +* FFTW_OUT_OF_PLACE: fftw_create_plan. (line 67) +* fftw_plan <1>: fftw_create_plan. (line 15) +* fftw_plan: Complex One-dimensional Transforms Tutorial. + (line 26) +* FFTW_REAL: Data Types. (line 59) +* fftw_real: Data Types. (line 23) +* FFTW_REAL_TO_COMPLEX <1>: rfftw_create_plan. (line 51) +* FFTW_REAL_TO_COMPLEX: Real One-dimensional Transforms Tutorial. + (line 34) +* FFTW_SCRAMBLED_INPUT: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 29) +* FFTW_SCRAMBLED_OUTPUT: Usage of MPI FFTW for Complex One-dimensional Transforms. + (line 29) +* fftw_test <1>: gcc and Pentium hacks. + (line 35) +* fftw_test: Installation on non-Unix Systems. + (line 33) +* fftw_threads: Usage of Multi-threaded FFTW. + (line 32) +* fftw_threads_init: Usage of Multi-threaded FFTW. + (line 19) +* fftw_threads_one: Usage of Multi-threaded FFTW. + (line 34) +* fftw_threads_test: How Many Threads to Use?. + (line 20) +* FFTW_THREADSAFE: Thread safety. (line 32) +* FFTW_TIME_MIN: Customizing the timer. + (line 21) +* FFTW_TRANSPOSED_ORDER <1>: MPI Tips. (line 17) +* FFTW_TRANSPOSED_ORDER <2>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 102) +* FFTW_TRANSPOSED_ORDER <3>: MPI Data Layout. (line 55) +* FFTW_TRANSPOSED_ORDER: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 97) +* FFTW_USE_WISDOM: Words of Wisdom. (line 26) +* fftwnd <1>: fftwnd. (line 14) +* fftwnd: Complex Multi-dimensional Transforms Tutorial. + (line 24) +* fftwnd_create_plan <1>: fftwnd_create_plan. (line 33) +* fftwnd_create_plan: Complex Multi-dimensional Transforms Tutorial. + (line 16) +* fftwnd_create_plan_specific: fftwnd_create_plan. (line 33) +* fftwnd_destroy_plan <1>: fftwnd_destroy_plan. (line 9) +* fftwnd_destroy_plan: Complex Multi-dimensional Transforms Tutorial. + (line 43) +* fftwnd_f77_create_plan: Fortran Examples. (line 62) +* fftwnd_f77_destroy_plan: Fortran Examples. (line 62) +* fftwnd_f77_one: Fortran Examples. (line 62) +* fftwnd_mpi: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 72) +* fftwnd_mpi_create_plan: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 54) +* fftwnd_mpi_destroy_plan: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 109) +* fftwnd_mpi_local_sizes: MPI Data Layout. (line 34) +* fftwnd_mpi_plan: Usage of MPI FFTW for Complex Multi-dimensional Transforms. + (line 50) +* fftwnd_one <1>: fftwnd. (line 14) +* fftwnd_one: Complex Multi-dimensional Transforms Tutorial. + (line 49) +* fftwnd_plan <1>: fftwnd_create_plan. (line 33) +* fftwnd_plan: Complex Multi-dimensional Transforms Tutorial. + (line 16) +* fftwnd_threads: Usage of Multi-threaded FFTW. + (line 37) +* fftwnd_threads_one: Usage of Multi-threaded FFTW. + (line 39) +* genfft <1>: Acknowledgments. (line 33) +* genfft: Generating your own code. + (line 27) +* rfftw: rfftw. (line 13) +* rfftw2d_create_plan <1>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 130) +* rfftw2d_create_plan <2>: rfftwnd_create_plan. (line 16) +* rfftw2d_create_plan: Real Multi-dimensional Transforms Tutorial. + (line 27) +* rfftw2d_f77_create_plan: Fortran Examples. (line 93) +* rfftw3d_create_plan <1>: rfftwnd_create_plan. (line 16) +* rfftw3d_create_plan: Real Multi-dimensional Transforms Tutorial. + (line 27) +* rfftw3d_mpi_create_plan: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 40) +* rfftw_create_plan <1>: rfftw_create_plan. (line 13) +* rfftw_create_plan: Real One-dimensional Transforms Tutorial. + (line 30) +* rfftw_create_plan_specific: rfftw_create_plan. (line 13) +* rfftw_destroy_plan <1>: rfftw_destroy_plan. (line 9) +* rfftw_destroy_plan: Real One-dimensional Transforms Tutorial. + (line 39) +* rfftw_f77_create_plan: Fortran Examples. (line 80) +* rfftw_f77_destroy_plan: Fortran Examples. (line 80) +* rfftw_f77_one: Fortran Examples. (line 80) +* rfftw_mpi_test <1>: MPI Tips. (line 35) +* rfftw_mpi_test: MPI FFTW Installation. + (line 26) +* rfftw_one <1>: rfftw. (line 13) +* rfftw_one: Real One-dimensional Transforms Tutorial. + (line 45) +* rfftw_plan <1>: rfftw_create_plan. (line 13) +* rfftw_plan: Real One-dimensional Transforms Tutorial. + (line 30) +* rfftw_test: Installation on non-Unix Systems. + (line 33) +* rfftw_threads: Usage of Multi-threaded FFTW. + (line 42) +* rfftw_threads_one: Usage of Multi-threaded FFTW. + (line 44) +* rfftw_threads_test: How Many Threads to Use?. + (line 20) +* rfftwnd_complex_to_real: rfftwnd. (line 19) +* rfftwnd_create_plan <1>: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 130) +* rfftwnd_create_plan <2>: rfftwnd_create_plan. (line 16) +* rfftwnd_create_plan: Real Multi-dimensional Transforms Tutorial. + (line 21) +* rfftwnd_destroy_plan: rfftwnd_destroy_plan. + (line 9) +* rfftwnd_f77_destroy_plan: Fortran Examples. (line 93) +* rfftwnd_f77_one_real_to_complex: Fortran Examples. (line 93) +* rfftwnd_mpi: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 64) +* rfftwnd_mpi_destroy_plan: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 83) +* rfftwnd_mpi_local_sizes: Usage of MPI FFTW for Real Multi-dimensional Transforms. + (line 49) +* rfftwnd_one_complex_to_real <1>: rfftwnd. (line 19) +* rfftwnd_one_complex_to_real: Real Multi-dimensional Transforms Tutorial. + (line 41) +* rfftwnd_one_real_to_complex <1>: rfftwnd. (line 19) +* rfftwnd_one_real_to_complex: Real Multi-dimensional Transforms Tutorial. + (line 41) +* rfftwnd_plan <1>: rfftwnd_create_plan. (line 16) +* rfftwnd_plan: Real Multi-dimensional Transforms Tutorial. + (line 21) +* rfftwnd_real_to_complex: rfftwnd. (line 19) +* rfftwnd_threads_complex_to_real: Usage of Multi-threaded FFTW. + (line 52) +* rfftwnd_threads_one_complex_to_real: Usage of Multi-threaded FFTW. + (line 56) +* rfftwnd_threads_one_real_to_complex: Usage of Multi-threaded FFTW. + (line 49) +* rfftwnd_threads_real_to_complex: Usage of Multi-threaded FFTW. + (line 47) + +  Tag Table: -(Indirect) -Node: Top871 -Node: Introduction4285 -Node: Tutorial10632 -Node: Complex One-dimensional Transforms Tutorial12861 -Node: Complex Multi-dimensional Transforms Tutorial16345 -Node: Real One-dimensional Transforms Tutorial20047 -Node: Real Multi-dimensional Transforms Tutorial25049 -Node: Multi-dimensional Array Format33192 -Node: Row-major Format33886 -Node: Column-major Format35224 -Node: Static Arrays in C36201 -Node: Dynamic Arrays in C36690 -Node: Dynamic Arrays in C-The Wrong Way38118 -Node: Words of Wisdom39864 -Node: Caveats in Using Wisdom44513 -Node: Importing and Exporting Wisdom46020 -Node: FFTW Reference49529 -Node: Data Types50878 -Node: One-dimensional Transforms Reference53627 -Node: fftw_create_plan54301 -Node: Discussion on Specific Plans60245 -Node: fftw61589 -Node: fftw_destroy_plan64240 -Node: What FFTW Really Computes64673 -Node: Multi-dimensional Transforms Reference66042 -Node: fftwnd_create_plan66701 -Node: fftwnd73511 -Node: fftwnd_destroy_plan76476 -Node: What FFTWND Really Computes76927 -Node: Real One-dimensional Transforms Reference77809 -Node: rfftw_create_plan78628 -Node: rfftw83538 -Node: rfftw_destroy_plan86137 -Node: What RFFTW Really Computes86592 -Node: Real Multi-dimensional Transforms Reference87982 -Node: rfftwnd_create_plan88743 -Node: rfftwnd93693 -Node: Array Dimensions for Real Multi-dimensional Transforms98154 -Node: Strides in In-place RFFTWND100202 -Node: rfftwnd_destroy_plan102594 -Node: What RFFTWND Really Computes103077 -Node: Wisdom Reference104015 -Node: fftw_export_wisdom104281 -Node: fftw_import_wisdom105928 -Node: fftw_forget_wisdom107795 -Node: Memory Allocator Reference108174 -Node: Thread safety109257 -Node: Parallel FFTW111170 -Node: Multi-threaded FFTW113264 -Node: Installation and Supported Hardware/Software114162 -Node: Usage of Multi-threaded FFTW115672 -Node: How Many Threads to Use?119885 -Node: Using Multi-threaded FFTW in a Multi-threaded Program121551 -Node: Tips for Optimal Threading122253 -Node: MPI FFTW123868 -Node: MPI FFTW Installation124710 -Node: Usage of MPI FFTW for Complex Multi-dimensional Transforms126508 -Node: MPI Data Layout131630 -Node: Usage of MPI FFTW for Real Multi-dimensional Transforms136825 -Node: Usage of MPI FFTW for Complex One-dimensional Transforms143243 -Node: MPI Tips147774 -Node: Calling FFTW from Fortran149766 -Node: Wrapper Routines151194 -Node: FFTW Constants in Fortran154536 -Node: Fortran Examples156134 -Node: Installation and Customization159823 -Node: Installation on Unix161160 -Node: Installation on non-Unix Systems165273 -Node: Installing FFTW in both single and double precision167593 -Node: gcc and Pentium hacks169277 -Node: Customizing the timer171808 -Node: Generating your own code173722 -Node: Acknowledgments176371 -Node: License and Copyright179226 -Node: Concept Index180863 -Node: Library Index194232 +Node: Top967 +Node: Introduction4377 +Node: Tutorial10733 +Node: Complex One-dimensional Transforms Tutorial12963 +Node: Complex Multi-dimensional Transforms Tutorial16465 +Node: Real One-dimensional Transforms Tutorial20186 +Ref: Real One-dimensional Transforms Tutorial-Footnote-125033 +Node: Real Multi-dimensional Transforms Tutorial25209 +Node: Multi-dimensional Array Format33354 +Node: Row-major Format34053 +Node: Column-major Format35402 +Node: Static Arrays in C36389 +Node: Dynamic Arrays in C36887 +Node: Dynamic Arrays in C-The Wrong Way38320 +Node: Words of Wisdom40070 +Node: Caveats in Using Wisdom44731 +Node: Importing and Exporting Wisdom46251 +Node: FFTW Reference49742 +Node: Data Types51087 +Node: One-dimensional Transforms Reference53837 +Node: fftw_create_plan54516 +Ref: fftw_create_plan-Footnote-160341 +Node: Discussion on Specific Plans60466 +Node: fftw61820 +Node: fftw_destroy_plan64470 +Node: What FFTW Really Computes64910 +Node: Multi-dimensional Transforms Reference66284 +Node: fftwnd_create_plan66950 +Ref: fftwnd_create_plan-Footnote-173372 +Node: fftwnd73744 +Node: fftwnd_destroy_plan76710 +Node: What FFTWND Really Computes77168 +Node: Real One-dimensional Transforms Reference78059 +Ref: Real One-dimensional Transforms Reference-Footnote-178794 +Node: rfftw_create_plan78885 +Node: rfftw83801 +Node: rfftw_destroy_plan86405 +Node: What RFFTW Really Computes86867 +Node: Real Multi-dimensional Transforms Reference88260 +Node: rfftwnd_create_plan89028 +Node: rfftwnd93976 +Node: Array Dimensions for Real Multi-dimensional Transforms98437 +Node: Strides in In-place RFFTWND100493 +Node: rfftwnd_destroy_plan102895 +Node: What RFFTWND Really Computes103385 +Node: Wisdom Reference104332 +Node: fftw_export_wisdom104606 +Node: fftw_import_wisdom106260 +Node: fftw_forget_wisdom108134 +Node: Memory Allocator Reference108520 +Node: Thread safety109606 +Node: Parallel FFTW111524 +Node: Multi-threaded FFTW113625 +Node: Installation and Supported Hardware/Software114528 +Node: Usage of Multi-threaded FFTW116048 +Ref: Usage of Multi-threaded FFTW-Footnote-1119916 +Node: How Many Threads to Use?120282 +Node: Using Multi-threaded FFTW in a Multi-threaded Program121959 +Node: Tips for Optimal Threading122669 +Node: MPI FFTW124304 +Node: MPI FFTW Installation125151 +Node: Usage of MPI FFTW for Complex Multi-dimensional Transforms126959 +Node: MPI Data Layout132067 +Node: Usage of MPI FFTW for Real Multi-dimensional Transforms137263 +Node: Usage of MPI FFTW for Complex One-dimensional Transforms143645 +Ref: Usage of MPI FFTW for Complex One-dimensional Transforms-Footnote-1147833 +Node: MPI Tips148200 +Ref: MPI Tips-Footnote-1150048 +Node: Calling FFTW from Fortran150204 +Node: Wrapper Routines151634 +Ref: Wrapper Routines-Footnote-1154778 +Node: FFTW Constants in Fortran154981 +Node: Fortran Examples156564 +Node: Installation and Customization160246 +Node: Installation on Unix161586 +Ref: Installation on Unix-Footnote-1165507 +Node: Installation on non-Unix Systems165706 +Node: Installing FFTW in both single and double precision168030 +Node: gcc and Pentium hacks169719 +Node: Customizing the timer172257 +Node: Generating your own code174178 +Node: Acknowledgments176834 +Node: License and Copyright179694 +Node: Concept Index181332 +Node: Library Index204270  End Tag Table --- fftw-2.1.3.orig/FAQ/fftw-faq.bfnn +++ fftw-2.1.3/FAQ/fftw-faq.bfnn @@ -19,6 +19,7 @@ \endcopy \copyto INFO +INFO-DIR-SECTION Development START-INFO-DIR-ENTRY * FFTW FAQ: (fftw-faq). FFTW Frequently Asked Questions with Answers. END-INFO-DIR-ENTRY debian/patches/04_configure.dpatch0000664000000000000000000002053011665017205014273 0ustar #! /bin/sh /usr/share/dpatch/dpatch-run ## 04_configure by ## ## All lines beginning with `## DP:' are a description of the patch. @DPATCH@ --- fftw-2.1.3.orig/configure.in +++ fftw-2.1.3/configure.in @@ -1,6 +1,7 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(fftw/planner.c) AM_INIT_AUTOMAKE(fftw, 2.1.3) +AM_MAINTAINER_MODE dnl This is the version info according to the libtool versioning system. dnl It does *not* correspond to the release number. @@ -23,7 +24,7 @@ AC_ARG_ENABLE(float, [ --enable-float compile fftw for single precision], enable_float=$enableval, enable_float=no) if test "$enable_float" = "yes"; then - AC_DEFINE(FFTW_ENABLE_FLOAT) + AC_DEFINE(FFTW_ENABLE_FLOAT,1,[Compile fftw for single precision]) fi FFTW_PREFIX="" @@ -42,38 +43,39 @@ FFTW_PREFIX="$ok" AC_MSG_RESULT($FFTW_PREFIX) fi -ACX_SUBST_XXX(FFTW_PREFIX) +dnl ACX_SUBST_XXX(FFTW_PREFIX) AC_SUBST(FFTW_PREFIX) FFTW_PREFIX1="xyz" if test -n "$FFTW_PREFIX"; then FFTW_PREFIX1="$FFTW_PREFIX" fi -ACX_SUBST_XXX(FFTW_PREFIX1) +dnl ACX_SUBST_XXX(FFTW_PREFIX1) +AC_SUBST(FFTW_PREFIX1) AC_ARG_ENABLE(i386-hacks, [ --enable-i386-hacks enable gcc/x86 specific performance hacks], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_I386_HACKS) + AC_DEFINE(FFTW_ENABLE_I386_HACKS,1,[enable gcc/x86 specific performance hacks]) fi AC_ARG_ENABLE(pentium-timer, [ --enable-pentium-timer enable high resolution Pentium timer], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_PENTIUM_TIMER) + AC_DEFINE(FFTW_ENABLE_PENTIUM_TIMER,1,[enable high resolution Pentium timer]) fi AC_ARG_ENABLE(debug, [ --enable-debug compile fftw with extra runtime checks for debugging], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_DEBUG) + AC_DEFINE(FFTW_DEBUG,1,[Define to compile fftw with extra runtime checks for debugging]) fi AC_ARG_ENABLE(debug-alignment, [ --enable-debug-alignment enable runtime checks for alignment on x86], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_DEBUG_ALIGNMENT) + AC_DEFINE(FFTW_DEBUG_ALIGNMENT,1,[Define to enable runtime checks for alignement onx86]) fi AC_ARG_ENABLE(vec-recurse, [ --enable-vec-recurse enable experimental performance hack], ok=$enableval, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(FFTW_ENABLE_VECTOR_RECURSE) + AC_DEFINE(FFTW_ENABLE_VECTOR_RECURSE,1,[Define to enable experimental performance hack]) fi dnl ----------------------------------------------------------------------- @@ -87,7 +89,7 @@ ACX_PROG_CC_EGCS AC_PROG_INSTALL AC_PROG_MAKE_SET -AC_PROG_RANLIB +# AC_PROG_RANLIB AC_PROG_LN_S AM_PROG_LIBTOOL AC_CHECK_PROG(PERL, perl, perl, echo perl) @@ -110,7 +112,7 @@ dnl ----------------------------------------------------------------------- AC_SUBST(SHARED_VERSION_INFO) -AC_DEFINE_UNQUOTED(FFTW_VERSION, "$VERSION") +AC_DEFINE_UNQUOTED(FFTW_VERSION, "$VERSION", [Fftw Version]) # Get the version number that will be appended to shared libraries: SHARED_VERSION=`echo $SHARED_VERSION_INFO | awk -F':' '{ print $1 "." $3 "." $2 }'` @@ -118,7 +120,7 @@ ACX_PROG_CC_MAXOPT -ACX_GCC_ALIGNS_STACK(AC_DEFINE(FFTW_GCC_ALIGNS_STACK), [ +ACX_GCC_ALIGNS_STACK(AC_DEFINE(FFTW_GCC_ALIGNS_STACK,1,[Gcc align stack]), [ if test "$enable_i386_hacks" = yes; then if test "${acx_gcc_stack_align_bug-no}" = yes; then # we are using a gcc with a stack alignment bug, and we should @@ -171,7 +173,7 @@ AC_TRY_LINK([#include ], if (!isnan(3.14159)) isnan(2.7183);, ok=yes, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(HAVE_ISNAN) + AC_DEFINE(HAVE_ISNAN,1,[Have isnan]) fi AC_MSG_RESULT(${ok}) @@ -184,7 +186,7 @@ #endif ], [hrtime_t foobar;], ok=yes, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(HAVE_HRTIME_T) + AC_DEFINE(HAVE_HRTIME_T,1,[Have hrtime_t type]) fi AC_MSG_RESULT(${ok}) @@ -193,7 +195,7 @@ AC_ARG_ENABLE(unsafe-mulmod, [ --enable-unsafe-mulmod risk overflow for large prime sizes], enable_unsafe_mulmod=$enableval, enable_unsafe_mulmod=no) if test "$enable_unsafe_mulmod" = "yes"; then - AC_DEFINE(FFTW_ENABLE_UNSAFE_MULMOD) + AC_DEFINE(FFTW_ENABLE_UNSAFE_MULMOD,1,[Risk overflow for large prime sizes]) fi @@ -218,7 +220,7 @@ if test -z "$THREADLIBS"; then AC_CHECK_LIB(pthreads, pthread_attr_init, [THREADLIBS="-lpthreads" - AC_DEFINE(FFTW_USING_POSIX_THREADS)]) + AC_DEFINE(FFTW_USING_POSIX_THREADS,1,[Using Posix threads])]) fi # Normally (e.g. on Linux), POSIX threads are in -lpthread. # We can't just use AC_CHECK_LIB, though, because DEC lossage @@ -233,7 +235,7 @@ LIBS="$save_LIBS" if test "$ok" = "yes"; then THREADLIBS="-lpthread" - AC_DEFINE(FFTW_USING_POSIX_THREADS) + AC_DEFINE(FFTW_USING_POSIX_THREADS,1,[Using Posix threads]) fi AC_MSG_RESULT(${ok}) fi @@ -242,12 +244,12 @@ if test -z "$THREADLIBS"; then AC_CHECK_LIB(pthread, pthread_join, [THREADLIBS="-lpthread" - AC_DEFINE(FFTW_USING_POSIX_THREADS)]) + AC_DEFINE(FFTW_USING_POSIX_THREADS,1,[Using Posix threads])]) fi if test -z "$THREADLIBS"; then AC_CHECK_FUNC(pthread_create, [THREADLIBS=" " - AC_DEFINE(FFTW_USING_POSIX_THREADS)]) + AC_DEFINE(FFTW_USING_POSIX_THREADS,1,[Using Posix threads])]) fi if test -n "$THREADLIBS"; then # detect AIX lossage: threads are created detached by default @@ -257,7 +259,7 @@ [int attr=PTHREAD_CREATE_UNDETACHED;], ok=yes, ok=no) if test "$ok" = "yes"; then - AC_DEFINE(HAVE_PTHREAD_CREATE_UNDETACHED) + AC_DEFINE(HAVE_PTHREAD_CREATE_UNDETACHED,1,[Have pthread create undetached]) fi AC_MSG_RESULT(${ok}) @@ -271,7 +273,7 @@ # do this as well, especially if cc_r is not available.) AC_MSG_CHECKING([if special flags are required for pthreads]) ok=no - AC_REQUIRE([AC_CANONICAL_HOST]) + AC_CANONICAL_HOST case "${host_cpu}-${host_os}" in *-aix*) CFLAGS="-D_THREAD_SAFE $CFLAGS" ok="-D_THREAD_SAFE";; @@ -284,32 +286,32 @@ if test -z "$THREADLIBS"; then AC_CHECK_LIB(thread, thr_create, [THREADLIBS="-lthread" - AC_DEFINE(FFTW_USING_SOLARIS_THREADS)]) + AC_DEFINE(FFTW_USING_SOLARIS_THREADS,1,[Using Solaris threads])]) fi # Mach C threads: if test -z "$THREADLIBS"; then AC_CHECK_FUNC(cthread_fork, [THREADLIBS=" " - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE(FFTW_USING_MACH_THREADS,1,[Using Mach threads])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi if test -z "$THREADLIBS"; then AC_CHECK_LIB(cthread, cthread_fork, [THREADLIBS="-lcthread" - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE(FFTW_USING_MACH_THREADS,1,[Using Mach threads])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi if test -z "$THREADLIBS"; then AC_CHECK_LIB(cthreads, cthread_fork, [THREADLIBS="-lcthreads" - AC_DEFINE(FFTW_USING_MACH_THREADS)]) + AC_DEFINE(FFTW_USING_MACH_THREADS,1,[Using Mach threads])]) AC_CHECK_HEADERS(mach/cthreads.h cthreads.h cthread.h) fi # BeOS threads: if test -z "$THREADLIBS"; then AC_CHECK_FUNC(spawn_thread, [THREADLIBS=" " - AC_DEFINE(FFTW_USING_BEOS_THREADS)]) + AC_DEFINE(FFTW_USING_BEOS_THREADS,1,[Using BeOS threads])]) fi if test -z "$THREADLIBS"; then AC_MSG_ERROR(couldn't find threads library for --enable-threads) @@ -335,9 +337,9 @@ if test "$enable_mpi" = "yes"; then save_CC="$CC" CC="$MPICC" - if test -z "$MPILIBS"; then - AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "]) - fi + #if test -z "$MPILIBS"; then + # AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "]) + #fi if test -z "$MPILIBS"; then AC_CHECK_LIB(mpi, MPI_Init, [MPILIBS="-lmpi"]) fi @@ -352,7 +354,7 @@ AC_MSG_CHECKING([for MPI_Comm_f2c]) ok=yes AC_TRY_LINK([#include - ], MPI_Comm_f2c(0);, AC_DEFINE(HAVE_MPI_COMM_F2C), ok=no) + ], MPI_Comm_f2c(0);, AC_DEFINE(HAVE_MPI_COMM_F2C,1,[Have MPI_Comm_f2c]), ok=no) AC_MSG_RESULT($ok) fi CC="$save_CC" debian/patches/00_fix_aclocal.dpatch0000664000000000000000000000355711665017205014564 0ustar #! /bin/sh /usr/share/dpatch/dpatch-run ## 00_fix_aclocal by ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: moving /u/s/kino/help to /u/s/doc/kino/help @DPATCH@ --- fftw-2.1.3.orig/acinclude.m4 +++ fftw-2.1.3/acinclude.m4 @@ -272,19 +272,19 @@ mangle_try=unknown case $ac_f77_mangle_type in lowercase) - AC_DEFINE(FFTW_FORTRANIZE_LOWERCASE) + AC_DEFINE(FFTW_FORTRANIZE_LOWERCASE,1,[Fortranize lowercase]) mangle_try=foo_bar_ ;; lowercase-underscore) - AC_DEFINE(FFTW_FORTRANIZE_LOWERCASE_UNDERSCORE) + AC_DEFINE(FFTW_FORTRANIZE_LOWERCASE_UNDERSCORE,1,[Fortranize lowercase underscore]) mangle_try=foo_bar__ ;; uppercase) - AC_DEFINE(FFTW_FORTRANIZE_UPPERCASE) + AC_DEFINE(FFTW_FORTRANIZE_UPPERCASE,1,[Fortranize uppercase]) mangle_try=FOO_BAR_ ;; uppercase-underscore) - AC_DEFINE(FFTW_FORTRANIZE_UPPERCASE_UNDERSCORE) + AC_DEFINE(FFTW_FORTRANIZE_UPPERCASE_UNDERSCORE,1,[Fortranize uppercase underscore]) mangle_try=FOO_BAR__ ;; esac @@ -297,7 +297,7 @@ LIBS="mangle-func.o $FLIBS $LIBS" AC_TRY_LINK(,$mangle_try();, [ac_f77_mangle_underscore=yes; - AC_DEFINE(FFTW_FORTRANIZE_EXTRA_UNDERSCORE)], + AC_DEFINE(FFTW_FORTRANIZE_EXTRA_UNDERSCORE,1,[Fortranize extra underscore])], [ac_f77_mangle_underscore=no]) LIBS="$ac_save_LIBS" AC_LANG_RESTORE @@ -312,7 +312,7 @@ define(ACX_SUBST_XXX, [ifdef([ACX_SUBST_XXX_$1], , [define([ACX_SUBST_XXX_$1], )dnl -AC_DIVERT_PUSH(AC_DIVERSION_SED)dnl +dnl AC_DIVERT_PUSH(AC_DIVERSION_SED)dnl s=XXX_$1_XXX=[$]$1=g -AC_DIVERT_POP()dnl +dnl AC_DIVERT_POP()dnl ])]) debian/patches/01_fix_makefiles.dpatch0000664000000000000000000004253411665017205015125 0ustar #! /bin/sh /usr/share/dpatch/dpatch-run ## 01_fix_makefiles by ## ## All lines beginning with `## DP:' are a description of the patch. ## DP: moving /u/s/kino/help to /u/s/doc/kino/help @DPATCH@ --- fftw-2.1.3.orig/threads/Makefile.am +++ fftw-2.1.3/threads/Makefile.am @@ -2,15 +2,15 @@ # the --enable-threads option. This requires us to use the combination # of foo_* and EXTRA_* variables below. -lib_LTLIBRARIES = @FFTW_THREADS_LIBLIST@ -include_HEADERS = @FFTW_THREADS_INCLUDELIST@ -noinst_PROGRAMS = @FFTW_THREADS_PROGLIST@ - -EXTRA_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXfftw_threads.la \ - libXXX_FFTW_PREFIX_XXXrfftw_threads.la -EXTRA_HEADERS = XXX_FFTW_PREFIX_XXXfftw_threads.h \ - XXX_FFTW_PREFIX_XXXrfftw_threads.h -EXTRA_PROGRAMS = fftw_threads_test rfftw_threads_test +#lib_LTLIBRARIES = @FFTW_THREADS_LIBLIST@ +#include_HEADERS = @FFTW_THREADS_INCLUDELIST@ +#noinst_PROGRAMS = @FFTW_THREADS_PROGLIST@ + +lib_LTLIBRARIES = lib@FFTW_PREFIX@fftw_threads.la \ + lib@FFTW_PREFIX@rfftw_threads.la +include_HEADERS = @FFTW_PREFIX@fftw_threads.h \ + @FFTW_PREFIX@rfftw_threads.h +noinst_PROGRAMS = fftw_threads_test rfftw_threads_test FFTWDIR=../fftw RFFTWDIR=../rfftw @@ -18,50 +18,54 @@ -I$(srcdir)/$(FFTWDIR) -I$(srcdir)/$(RFFTWDIR) -I$(srcdir) -libXXX_FFTW_PREFIX_XXXfftw_threads_la_SOURCES = \ +lib@FFTW_PREFIX@fftw_threads_la_SOURCES = \ executor_threads.c fftw_threads.c \ fftwnd_threads.c \ fftw_f77_threads.c \ - XXX_FFTW_PREFIX_XXXfftw_threads.h \ + @FFTW_PREFIX@fftw_threads.h \ fftw_threads-int.h -libXXX_FFTW_PREFIX_XXXfftw_threads_la_LDFLAGS = \ - -version-info @SHARED_VERSION_INFO@ \ - -rpath $(libdir) - -XXX_FFTW_PREFIX1_XXXfftw_threads.h: fftw_threads.h - rm -f XXX_FFTW_PREFIX_XXXfftw_threads.h - sed 's/ XXX_FFTW_PREFIX_XXXfftw_threads.h +lib@FFTW_PREFIX@fftw_threads_la_LDFLAGS = \ + -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@fftw_threads_la_LIBADD = \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la -lpthread + +@FFTW_PREFIX1@fftw_threads.h: fftw_threads.h + rm -f @FFTW_PREFIX@fftw_threads.h + sed 's/ @FFTW_PREFIX@fftw_threads.h fftw_threads_test_SOURCES = fftw_threads_test.c fftw_threads_test_LDADD = ../tests/test_main.o \ - libXXX_FFTW_PREFIX_XXXfftw_threads.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la \ + lib@FFTW_PREFIX@fftw_threads.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la \ @THREADLIBS@ -libXXX_FFTW_PREFIX_XXXrfftw_threads_la_SOURCES = \ +lib@FFTW_PREFIX@rfftw_threads_la_SOURCES = \ rexec_threads.c rexec2_threads.c \ rfftwnd_threads.c \ rfftw_f77_threads.c \ - XXX_FFTW_PREFIX_XXXfftw_threads.h \ + @FFTW_PREFIX@fftw_threads.h \ fftw_threads-int.h \ - XXX_FFTW_PREFIX_XXXrfftw_threads.h -libXXX_FFTW_PREFIX_XXXrfftw_threads_la_LDFLAGS = \ - -version-info @SHARED_VERSION_INFO@ \ - -rpath $(libdir) - -XXX_FFTW_PREFIX1_XXXrfftw_threads.h: rfftw_threads.h - rm -f XXX_FFTW_PREFIX_XXXrfftw_threads.h - sed 's/ XXX_FFTW_PREFIX_XXXrfftw_threads.h + @FFTW_PREFIX@rfftw_threads.h +lib@FFTW_PREFIX@rfftw_threads_la_LDFLAGS = \ + -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@rfftw_threads_la_LIBADD = \ + lib@FFTW_PREFIX@fftw_threads.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la \ + $(RFFTWDIR)/lib@FFTW_PREFIX@rfftw.la -lpthread + +@FFTW_PREFIX1@rfftw_threads.h: rfftw_threads.h + rm -f @FFTW_PREFIX@rfftw_threads.h + sed 's/ @FFTW_PREFIX@rfftw_threads.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXfftw_threads.h \ - XXX_FFTW_PREFIX1_XXXrfftw_threads.h +CLEANFILES = @FFTW_PREFIX1@fftw_threads.h \ + @FFTW_PREFIX1@rfftw_threads.h rfftw_threads_test_SOURCES = rfftw_threads_test.c rfftw_threads_test_LDADD = ../tests/test_main.o \ - libXXX_FFTW_PREFIX_XXXrfftw_threads.la \ - libXXX_FFTW_PREFIX_XXXfftw_threads.la \ - $(RFFTWDIR)/libXXX_FFTW_PREFIX_XXXrfftw.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la \ + lib@FFTW_PREFIX@rfftw_threads.la \ + lib@FFTW_PREFIX@fftw_threads.la \ + $(RFFTWDIR)/lib@FFTW_PREFIX@rfftw.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la \ @THREADLIBS@ # for some reason, automake tries to use autoheader in order to --- fftw-2.1.3.orig/rfftw/Makefile.am +++ fftw-2.1.3/rfftw/Makefile.am @@ -1,8 +1,8 @@ # This file was automatically generated # DO NOT EDIT! # -lib_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXrfftw.la -include_HEADERS = XXX_FFTW_PREFIX_XXXrfftw.h +lib_LTLIBRARIES = lib@FFTW_PREFIX@rfftw.la +include_HEADERS = @FFTW_PREFIX@rfftw.h INCLUDES = -I$(srcdir)/../fftw -I$(srcdir) NOTW_CODELETS= frc_1.c frc_2.c frc_3.c frc_4.c frc_5.c frc_6.c frc_7.c frc_8.c frc_9.c frc_10.c frc_11.c frc_12.c frc_13.c frc_14.c frc_15.c frc_16.c frc_32.c frc_64.c frc_128.c @@ -14,18 +14,19 @@ OTHERSRC = rconfig.c rplanner.c rexec.c rexec2.c rfftwnd.c rgeneric.c \ rfftwf77.c -libXXX_FFTW_PREFIX_XXXrfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ +lib@FFTW_PREFIX@rfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ rfftw.h -libXXX_FFTW_PREFIX_XXXrfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@rfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +#lib@FFTW_PREFIX@rfftw_la_LIBADD = $(srcdir)/fftw/lib@FFTW_PREFIX@fftw.la MAINTAINERCLEANFILES = $(CODELETS) rconfig.c DISTCLEANFILES = srfftw.h drfftw.h -XXX_FFTW_PREFIX1_XXXrfftw.h: rfftw.h - rm -f XXX_FFTW_PREFIX_XXXrfftw.h - sed 's/ XXX_FFTW_PREFIX_XXXrfftw.h +@FFTW_PREFIX1@rfftw.h: rfftw.h + rm -f @FFTW_PREFIX@rfftw.h + sed 's/ @FFTW_PREFIX@rfftw.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXrfftw.h +CLEANFILES = @FFTW_PREFIX1@rfftw.h # for some reason, automake tries to use autoheader in order to # generate config.h.in, and fails because config.h.in is GNU-lly --- fftw-2.1.3.orig/fftw/Makefile.am +++ fftw-2.1.3/fftw/Makefile.am @@ -1,8 +1,8 @@ # This file was automatically generated # DO NOT EDIT! # -lib_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXfftw.la -include_HEADERS = XXX_FFTW_PREFIX_XXXfftw.h +lib_LTLIBRARIES = lib@FFTW_PREFIX@fftw.la +include_HEADERS = @FFTW_PREFIX@fftw.h INCLUDES = -I$(srcdir)/../fftw -I$(srcdir) NOTW_CODELETS= fn_1.c fn_2.c fn_3.c fn_4.c fn_5.c fn_6.c fn_7.c fn_8.c fn_9.c fn_10.c fn_11.c fn_12.c fn_13.c fn_14.c fn_15.c fn_16.c fn_32.c fn_64.c @@ -15,18 +15,18 @@ generic.c fftwnd.c malloc.c wisdom.c wisdomio.c putils.c rader.c \ fftwf77.c f77_func.h -libXXX_FFTW_PREFIX_XXXfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ +lib@FFTW_PREFIX@fftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ fftw.h fftw-int.h -libXXX_FFTW_PREFIX_XXXfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@fftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ MAINTAINERCLEANFILES = $(CODELETS) config.c DISTCLEANFILES = fftw.h sfftw.h dfftw.h -XXX_FFTW_PREFIX1_XXXfftw.h: fftw.h - rm -f XXX_FFTW_PREFIX_XXXfftw.h - sed 's/ XXX_FFTW_PREFIX_XXXfftw.h +@FFTW_PREFIX1@fftw.h: fftw.h + rm -f @FFTW_PREFIX@fftw.h + sed 's/ @FFTW_PREFIX@fftw.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXfftw.h +CLEANFILES = @FFTW_PREFIX1@fftw.h # for some reason, automake tries to use autoheader in order to # generate config.h.in, and fails because config.h.in is GNU-lly --- fftw-2.1.3.orig/tests/Makefile.am +++ fftw-2.1.3/tests/Makefile.am @@ -6,11 +6,11 @@ INCLUDES = -I$(srcdir)/$(FFTWDIR) -I$(srcdir)/$(RFFTWDIR) fftw_test_SOURCES = fftw_test.c test_main.c -fftw_test_LDADD = $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la +fftw_test_LDADD = $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la rfftw_test_SOURCES = rfftw_test.c test_main.c -rfftw_test_LDADD = $(RFFTWDIR)/libXXX_FFTW_PREFIX_XXXrfftw.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la +rfftw_test_LDADD = $(RFFTWDIR)/lib@FFTW_PREFIX@rfftw.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la check: fftw-tests rfftw-tests --- fftw-2.1.3.orig/mpi/Makefile.am +++ fftw-2.1.3/mpi/Makefile.am @@ -2,68 +2,74 @@ # the --enable-mpi option. This requires us to use the combination # of foo_* and EXTRA_* variables below. -lib_LTLIBRARIES = @FFTW_MPI_LIBLIST@ -include_HEADERS = @FFTW_MPI_INCLUDELIST@ -noinst_PROGRAMS = @FFTW_MPI_PROGLIST@ - -EXTRA_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXfftw_mpi.la \ - libXXX_FFTW_PREFIX_XXXrfftw_mpi.la -EXTRA_HEADERS = XXX_FFTW_PREFIX_XXXfftw_mpi.h XXX_FFTW_PREFIX_XXXrfftw_mpi.h -EXTRA_PROGRAMS = test_sched test_transpose_mpi fftw_mpi_test rfftw_mpi_test +#lib_LTLIBRARIES = @FFTW_MPI_LIBLIST@ +#include_HEADERS = @FFTW_MPI_INCLUDELIST@ +#noinst_PROGRAMS = @FFTW_MPI_PROGLIST@ + +lib_LTLIBRARIES = lib@FFTW_PREFIX@fftw_mpi.la \ + lib@FFTW_PREFIX@rfftw_mpi.la +include_HEADERS = @FFTW_PREFIX@fftw_mpi.h @FFTW_PREFIX@rfftw_mpi.h +noinst_PROGRAMS = test_sched test_transpose_mpi fftw_mpi_test rfftw_mpi_test EXTRA_DIST = README.f77 -CC=@MPICC@ +#CC=@MPICC@ FFTWDIR=../fftw RFFTWDIR=../rfftw -INCLUDES = -I$(srcdir)/../tests \ +INCLUDES = -I$(srcdir)/../tests -I/usr/include/mpi \ -I$(srcdir)/$(FFTWDIR) -I$(srcdir)/$(RFFTWDIR) -I$(srcdir) -libXXX_FFTW_PREFIX_XXXfftw_mpi_la_SOURCES = \ +lib@FFTW_PREFIX@fftw_mpi_la_SOURCES = \ sched.c sched.h TOMS_transpose.c TOMS_transpose.h \ transpose_mpi.c fftwnd_mpi.c fftw_mpi.c \ - XXX_FFTW_PREFIX_XXXfftw_mpi.h \ + @FFTW_PREFIX@fftw_mpi.h \ fftw_f77_mpi.c fftw_f77_mpi.h -libXXX_FFTW_PREFIX_XXXfftw_mpi_la_LDFLAGS = \ - -version-info @SHARED_VERSION_INFO@ \ - -rpath $(libdir) - -XXX_FFTW_PREFIX1_XXXfftw_mpi.h: fftw_mpi.h - rm -f XXX_FFTW_PREFIX_XXXfftw_mpi.h - sed 's/ XXX_FFTW_PREFIX_XXXfftw_mpi.h +lib@FFTW_PREFIX@fftw_mpi_la_LDFLAGS = \ + -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@fftw_mpi_la_LIBADD = \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la \ + @MPILIBS@ -libXXX_FFTW_PREFIX_XXXrfftw_mpi_la_SOURCES = \ - rfftwnd_mpi.c XXX_FFTW_PREFIX_XXXrfftw_mpi.h \ + +@FFTW_PREFIX1@fftw_mpi.h: fftw_mpi.h + rm -f @FFTW_PREFIX@fftw_mpi.h + sed 's/ @FFTW_PREFIX@fftw_mpi.h + +lib@FFTW_PREFIX@rfftw_mpi_la_SOURCES = \ + rfftwnd_mpi.c @FFTW_PREFIX@rfftw_mpi.h \ rfftw_f77_mpi.c -libXXX_FFTW_PREFIX_XXXrfftw_mpi_la_LDFLAGS = \ - -version-info @SHARED_VERSION_INFO@ \ - -rpath $(libdir) - -XXX_FFTW_PREFIX1_XXXrfftw_mpi.h: rfftw_mpi.h - rm -f XXX_FFTW_PREFIX_XXXrfftw_mpi.h - sed 's/ XXX_FFTW_PREFIX_XXXrfftw_mpi.h +lib@FFTW_PREFIX@rfftw_mpi_la_LDFLAGS = \ + -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@rfftw_mpi_la_LIBADD = \ + lib@FFTW_PREFIX@fftw_mpi.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la $(RFFTWDIR)/lib@FFTW_PREFIX@rfftw.la \ + @MPILIBS@ + +@FFTW_PREFIX1@rfftw_mpi.h: rfftw_mpi.h + rm -f @FFTW_PREFIX@rfftw_mpi.h + sed 's/ @FFTW_PREFIX@rfftw_mpi.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXfftw_mpi.h XXX_FFTW_PREFIX1_XXXrfftw_mpi.h +CLEANFILES = @FFTW_PREFIX1@fftw_mpi.h @FFTW_PREFIX1@rfftw_mpi.h test_sched_SOURCES = test_sched.c -test_sched_LDADD = libXXX_FFTW_PREFIX_XXXfftw_mpi.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la @MPILIBS@ +test_sched_LDADD = lib@FFTW_PREFIX@fftw_mpi.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la @MPILIBS@ test_transpose_mpi_SOURCES = test_transpose_mpi.c -test_transpose_mpi_LDADD = libXXX_FFTW_PREFIX_XXXfftw_mpi.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la @MPILIBS@ +test_transpose_mpi_LDADD = lib@FFTW_PREFIX@fftw_mpi.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la @MPILIBS@ fftw_mpi_test_SOURCES = fftw_mpi_test.c fftw_mpi_test_LDADD = ../tests/test_main.o \ - libXXX_FFTW_PREFIX_XXXfftw_mpi.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la @MPILIBS@ + lib@FFTW_PREFIX@fftw_mpi.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la @MPILIBS@ rfftw_mpi_test_SOURCES = rfftw_mpi_test.c rfftw_mpi_test_LDADD = ../tests/test_main.o \ - libXXX_FFTW_PREFIX_XXXrfftw_mpi.la \ - libXXX_FFTW_PREFIX_XXXfftw_mpi.la \ - $(RFFTWDIR)/libXXX_FFTW_PREFIX_XXXrfftw.la \ - $(FFTWDIR)/libXXX_FFTW_PREFIX_XXXfftw.la \ + lib@FFTW_PREFIX@rfftw_mpi.la \ + lib@FFTW_PREFIX@fftw_mpi.la \ + $(RFFTWDIR)/lib@FFTW_PREFIX@rfftw.la \ + $(FFTWDIR)/lib@FFTW_PREFIX@fftw.la \ @MPILIBS@ # for some reason, automake tries to use autoheader in order to --- fftw-2.1.3.orig/gensrc/Makefile.fftw.am +++ fftw-2.1.3/gensrc/Makefile.fftw.am @@ -1,8 +1,8 @@ # This file was automatically generated # DO NOT EDIT! # -lib_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXfftw.la -include_HEADERS = XXX_FFTW_PREFIX_XXXfftw.h +lib_LTLIBRARIES = lib@FFTW_PREFIX@fftw.la +include_HEADERS = @FFTW_PREFIX@fftw.h INCLUDES = -I$(srcdir)/../fftw -I$(srcdir) NOTW_CODELETS=@NOTW_CODELETS@ @@ -15,18 +15,18 @@ generic.c fftwnd.c malloc.c wisdom.c wisdomio.c putils.c rader.c \ fftwf77.c f77_func.h -libXXX_FFTW_PREFIX_XXXfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ +lib@FFTW_PREFIX@fftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ fftw.h fftw-int.h -libXXX_FFTW_PREFIX_XXXfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@fftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ MAINTAINERCLEANFILES = $(CODELETS) config.c DISTCLEANFILES = fftw.h sfftw.h dfftw.h -XXX_FFTW_PREFIX1_XXXfftw.h: fftw.h - rm -f XXX_FFTW_PREFIX_XXXfftw.h - sed 's/ XXX_FFTW_PREFIX_XXXfftw.h +@FFTW_PREFIX1@fftw.h: fftw.h + rm -f @FFTW_PREFIX@fftw.h + sed 's/ @FFTW_PREFIX@fftw.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXfftw.h +CLEANFILES = @FFTW_PREFIX1@fftw.h # for some reason, automake tries to use autoheader in order to # generate config.h.in, and fails because config.h.in is GNU-lly --- fftw-2.1.3.orig/gensrc/Makefile.rfftw.am +++ fftw-2.1.3/gensrc/Makefile.rfftw.am @@ -1,8 +1,8 @@ # This file was automatically generated # DO NOT EDIT! # -lib_LTLIBRARIES = libXXX_FFTW_PREFIX_XXXrfftw.la -include_HEADERS = XXX_FFTW_PREFIX_XXXrfftw.h +lib_LTLIBRARIES = lib@FFTW_PREFIX@rfftw.la +include_HEADERS = @FFTW_PREFIX@rfftw.h INCLUDES = -I$(srcdir)/../fftw -I$(srcdir) NOTW_CODELETS=@NOTW_CODELETS@ @@ -14,18 +14,19 @@ OTHERSRC = rconfig.c rplanner.c rexec.c rexec2.c rfftwnd.c rgeneric.c \ rfftwf77.c -libXXX_FFTW_PREFIX_XXXrfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ +lib@FFTW_PREFIX@rfftw_la_SOURCES = $(CODELETS) $(OTHERSRC) \ rfftw.h -libXXX_FFTW_PREFIX_XXXrfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@rfftw_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +lib@FFTW_PREFIX@rfftw_la_LIBADD = $(FFTWDIR)/fftw/lib@FFTW_PREFIX@fftw.la MAINTAINERCLEANFILES = $(CODELETS) rconfig.c DISTCLEANFILES = srfftw.h drfftw.h -XXX_FFTW_PREFIX1_XXXrfftw.h: rfftw.h - rm -f XXX_FFTW_PREFIX_XXXrfftw.h - sed 's/ XXX_FFTW_PREFIX_XXXrfftw.h +@FFTW_PREFIX1@rfftw.h: rfftw.h + rm -f @FFTW_PREFIX@rfftw.h + sed 's/ @FFTW_PREFIX@rfftw.h -CLEANFILES = XXX_FFTW_PREFIX1_XXXrfftw.h +CLEANFILES = @FFTW_PREFIX1@rfftw.h # for some reason, automake tries to use autoheader in order to # generate config.h.in, and fails because config.h.in is GNU-lly debian/fftw-dev.install0000664000000000000000000000005511665024535012311 0ustar usr/include/* usr/lib/lib*.a usr/lib/lib*.so