pax_global_header00006660000000000000000000000064146203423000014504gustar00rootroot0000000000000052 comment=c2dcab07d4cff11b80cfcaa3f2f5acab55f05760 mpi4py-fft-2.0.6/000077500000000000000000000000001462034230000135105ustar00rootroot00000000000000mpi4py-fft-2.0.6/.github/000077500000000000000000000000001462034230000150505ustar00rootroot00000000000000mpi4py-fft-2.0.6/.github/dependabot.yml000066400000000000000000000001601462034230000176750ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: weekly mpi4py-fft-2.0.6/.github/workflows/000077500000000000000000000000001462034230000171055ustar00rootroot00000000000000mpi4py-fft-2.0.6/.github/workflows/main.yml000066400000000000000000000030701462034230000205540ustar00rootroot00000000000000name: main on: push: branches: - master pull_request: branches: - master workflow_dispatch: jobs: test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - ubuntu-latest - macos-latest mpi: - mpich - openmpi py: # - "3.7" # - "3.8" # - "3.9" - "3.10" - "3.11" - "3.12" steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.py }} - run: | # Install fftw case $(uname) in Linux) sudo apt update sudo apt install -y -q libfftw3-dev ;; Darwin) export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew install fftw ;; esac - run: python -m pip install -U pip build - run: python -m build - uses: mpi4py/setup-mpi@v1 with: mpi: ${{ matrix.mpi }} - run: pip install -vvv dist/mpi4py_fft-*.whl env: CFLAGS: "-O0" - run: pip install -r conf/requirements-test.txt - if: matrix.mpi == 'mpich' && startsWith(matrix.os, 'ubuntu') run: ./runtests.sh working-directory: tests - if: matrix.mpi == 'mpich' && startsWith(matrix.os, 'ubuntu') uses: codecov/codecov-action@v4 with: files: test/coverage.xml name: ${{ matrix.os }}-${{ matrix.mpi }}-${{ matrix.py }} env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} mpi4py-fft-2.0.6/.github/workflows/release.yml000066400000000000000000000024251462034230000212530ustar00rootroot00000000000000name: release permissions: contents: read on: release: types: - published workflow_dispatch: jobs: distribution: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: python-version: 3 - name: Upgrade pip run: python -m pip install -U pip - name: Install build and twine run: python -m pip install -U build twine - name: Build distribution run: python -m build --sdist - name: Check source distribution run: python -m twine check dist/*.tar.gz - name: Upload distribution assets uses: actions/upload-artifact@v4 with: name: release path: | dist/*.tar.gz pypi-publish: if: ${{ github.event_name == 'release' }} name: Upload release to PyPI runs-on: ubuntu-latest needs: distribution environment: name: pypi url: https://pypi.org/p/mpi4py-fft permissions: id-token: write steps: - name: Download distribution assets uses: actions/download-artifact@v4 with: name: release path: dist - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 mpi4py-fft-2.0.6/.gitignore000066400000000000000000000002751462034230000155040ustar00rootroot00000000000000*.so *.py[cod] *.*-workspace *.swp .eggs/ build/ htmlcov/ _build/ _static/ _templates/ *.dat *.html *.xml *.h5 *.nc *.xdmf fftwf_*.* fftwl_*.* __pycache__ .pytest_cache mpi4py_fft.egg-info mpi4py-fft-2.0.6/.pylintrc000066400000000000000000000007661462034230000153660ustar00rootroot00000000000000[MASTER] disable = locally-disabled, too-few-public-methods, too-many-instance-attributes, invalid-name, missing-docstring, redefined-builtin, unused-argument, too-many-arguments, too-many-return-statements, line-too-long, no-member, not-callable, len-as-condition, consider-using-enumerate, wildcard-import, import-error, #arguments-differ extension-pkg-whitelist = mpi4py,pyfftw,numpy,netCDF4 generated-members=np.floating reports = no mpi4py-fft-2.0.6/.readthedocs.yaml000066400000000000000000000002621462034230000167370ustar00rootroot00000000000000conda: environment: docs/environment.yml version: 2 build: os: ubuntu-22.04 tools: python: "mambaforge-22.9" sphinx: configuration: docs/source/conf.py formats: all mpi4py-fft-2.0.6/LICENSE.rst000066400000000000000000000026371462034230000153340ustar00rootroot00000000000000BSD 2-Clause License :Author: Lisandro Dalcin and Mikael Mortensen :Contact: dalcinl@gmail.com or mikaem@math.uio.no Copyright (c) 2017, Lisandro Dalcin and Mikael Mortensen. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mpi4py-fft-2.0.6/MANIFEST.in000066400000000000000000000002411462034230000152430ustar00rootroot00000000000000include *.py *.txt *.rst recursive-include mpi4py_fft *.py *.pyx *.pxd fftw_planxfftn.[c,h] recursive-exclude mpi4py_fft fftw[f,l]_xfftn.pyx fftw[f,l]_xfftn.pxd mpi4py-fft-2.0.6/Makefile000066400000000000000000000005461462034230000151550ustar00rootroot00000000000000VERSION=$(shell python3 -c "import mpi4py_fft; print(mpi4py_fft.__version__)") default: python setup.py build build_ext -i pip: rm -f dist/* python setup.py sdist twine upload dist/* tag: git tag $(VERSION) git push --tags publish: tag pip clean: git clean -dxf mpi4py_fft cd docs && make clean && cd .. @rm -rf *.egg-info/ build/ dist/ .eggs/ mpi4py-fft-2.0.6/README.rst000066400000000000000000000056441462034230000152100ustar00rootroot00000000000000mpi4py-fft ---------- .. image:: https://dev.azure.com/mpi4py/mpi4py-fft/_apis/build/status/mpi4py.mpi4py-fft?branchName=master :target: https://dev.azure.com/mpi4py/mpi4py-fft .. image:: https://codecov.io/bb/mpi4py/mpi4py-fft/branch/master/graph/badge.svg :target: https://codecov.io/bb/mpi4py/mpi4py-fft .. image:: https://readthedocs.org/projects/mpi4py-fft/badge/?version=latest :target: https://mpi4py-fft.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status .. image:: https://anaconda.org/conda-forge/mpi4py-fft/badges/downloads.svg :target: https://anaconda.org/conda-forge/mpi4py-fft :alt: Total downloads from conda-forge mpi4py-fft is a Python package for computing Fast Fourier Transforms (FFTs). Large arrays are distributed and communications are handled under the hood by MPI for Python (mpi4py). To distribute large arrays we are using a `new and completely generic algorithm `_ that allows for any index set of a multidimensional array to be distributed. We can distribute just one index (a slab decomposition), two index sets (pencil decomposition) or even more for higher-dimensional arrays. mpi4py-fft comes with its own Python interface to the serial `FFTW `_ library. This interface can be used much like `pyfftw `_, and even for real-to-real transforms, like discrete cosine or sine transforms. Further documentation is found at `readthedocs `_. Installation ------------ The mpi4py-fft package can be installed using:: pip install mpi4py-fft or, to get the latest version from GitHub:: pip install git+https://github.com/mpi4py/mpi4py-fft@master Install with conda from the coda-forge channel:: conda install -c conda-forge mpi4py-fft or build it with conda build from the main source directory:: conda build -c conda-forge conf/ conda create --name mpi4py_fft mpi4py_fft --use-local which will pull in the required dependencies from the conda-forge channel. Note that mpi4py-fft depends on Python packages * mpi4py * numpy * cython and the serial C-library * `FFTW `_ Note in particular that *mpi4py* requires that you have a working MPI installation, with the compiler wrapper *mpicc*, on your search path. The FFTW header and libraries must also be available on the search path, and we will build wrappers for any precision found of the FFTW libraries. All of the above dependencies are available and will be downloaded through the conda-forge channel if conda is used for installation. However, pip will not help you with MPI or FFTW. For IO you need to install either `h5py `_ or `netCDF4 `_ with support for MPI. Both are available from the coda-forge channel through:: conda install -c conda-forge h5py=*=mpi* netcdf4=*=mpi* mpi4py-fft-2.0.6/conf/000077500000000000000000000000001462034230000144355ustar00rootroot00000000000000mpi4py-fft-2.0.6/conf/build.sh000066400000000000000000000003231462034230000160660ustar00rootroot00000000000000#!/bin/bash if [ "$(uname)" == "Darwin" ] then export LDFLAGS="-Wl,-rpath,$PREFIX/lib" export MACOSX_DEPLOYMENT_TARGET=10.9 fi $PYTHON -m pip install . --no-deps --ignore-installed --no-cache-dir -vvv mpi4py-fft-2.0.6/conf/conda_build_config.yaml000066400000000000000000000000641462034230000211110ustar00rootroot00000000000000#mpi: # - 'openmpi' # - 'mpich' numpy: - '1.21' mpi4py-fft-2.0.6/conf/meta.yaml000066400000000000000000000011371462034230000162510ustar00rootroot00000000000000{% set data = load_setup_py_data() %} package: name: mpi4py_fft_test version: {{ data.get('version') }} source: git_url: ../ build: number: 2002 requirements: build: - {{ compiler('c') }} - {{ compiler('cxx') }} host: - python - cython - numpy - pip - fftw - setuptools run: - python - mpi4py - mpich - {{ pin_compatible('numpy') }} - fftw - hdf5 * mpi_* - h5py * mpi_* - netcdf4 * mpi_* test: requires: - coverage - codecov - scipy - pyfftw 0.12 source_files: - tests imports: - mpi4py_fft mpi4py-fft-2.0.6/conf/requirements-test.txt000066400000000000000000000001131462034230000206710ustar00rootroot00000000000000coverage pyfftw; sys_platform == 'linux' and python_version < '3.12' scipy mpi4py-fft-2.0.6/conf/run_test.sh000066400000000000000000000000461462034230000166340ustar00rootroot00000000000000#!bin/bash pushd tests ./runtests.sh mpi4py-fft-2.0.6/docs/000077500000000000000000000000001462034230000144405ustar00rootroot00000000000000mpi4py-fft-2.0.6/docs/Makefile000066400000000000000000000012731462034230000161030ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = mpi4py-fft SOURCEDIR = source BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile doctest: @$(SPHINXBUILD) -b doctest "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) mpi4py-fft-2.0.6/docs/environment.yml000066400000000000000000000002711462034230000175270ustar00rootroot00000000000000name: mpi4py_fft_doc_env channels: - conda-forge - defaults dependencies: - cython - fftw - numpy - scipy - mpi4py - pip - setuptools - pip: - sphinx-rtd-themempi4py-fft-2.0.6/docs/make.bat000066400000000000000000000014561462034230000160530ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=mpi4py-fft if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd mpi4py-fft-2.0.6/docs/paper/000077500000000000000000000000001462034230000155475ustar00rootroot00000000000000mpi4py-fft-2.0.6/docs/paper/paper.bib000066400000000000000000000071141462034230000173370ustar00rootroot00000000000000@article{dalcin18, author = {Lisandro Dalcin and Mikael Mortensen and David E. Keyes}, title = {Fast parallel multidimensional {FFT} using advanced {MPI}}, journal = {{J. Parallel Distrib. Comput.}}, volume = {128}, pages = {137--150}, year = {2019}, doi = {10.1016/j.jpdc.2019.02.006}, url = {https://doi.org/10.1016/j.jpdc.2019.02.006} } @article{mortensen_joss, author = {Mikael Mortensen}, year = {2018}, title = {{Shenfun: High performance spectral Galerkin computing platform}}, journal = {J. Open Source Software}, volume = {3}, number = {31}, pages = {1071}, doi = {10.21105/joss.01071} } @inproceedings{mortensen17, author = {Mikael Mortensen}, booktitle = {MekIT'17 - Ninth national conference on Computational Mechanics}, isbn = {978-84-947311-1-2}, pages = {273--298}, publisher = {Int Center for Numerical Methods in Engineering (CIMNE)}, title = {{Shenfun - automating the spectral Galerkin method}}, editor = {Skallerud, Bjorn Helge and Andersson, Helge Ingolf}, year = {2017} } @article{strang94, ISSN = {00030996}, URL = {http://www.jstor.org/stable/29775194}, author = {Gilbert Strang}, journal = {American Scientist}, number = {3}, pages = {250--255}, publisher = {Sigma Xi, The Scientific Research Society}, title = {Wavelets}, volume = {82}, year = {1994} } @article{mpi4py08, author = {Lisandro Dalcin and Rodrigo Paz and Mario Storti and Jorge D'Elia}, title = {{MPI} for {P}ython: Performance improvements and {MPI}-2 extensions}, journal = {J. Parallel Distrib. Comput.}, volume = {68}, number = {5}, pages = {655--662}, year = {2008}, issn = {0743-7315}, url = {https://doi.org/10.1016/j.jpdc.2007.09.005}, doi = {10.1016/j.jpdc.2007.09.005}, } @article{cython11, author = {Behnel, Stefan and Bradshaw, Robert and Citro, Craig and Dalcin, Lisandro and Seljebotn, Dag Sverre and Smith, Kurt}, title = {Cython: The Best of Both Worlds}, journal = {Comput. in Science and Engg.}, volume = {13}, number = {2}, pages = {31--39}, year = {2011}, issn = {1521-9615}, url = {https://doi.org/10.1109/MCSE.2010.118}, doi = {10.1109/MCSE.2010.118}, } @article{fftw05, author = {Frigo, Matteo and Johnson, Steven~G.}, title = {The Design and Implementation of {FFTW3}}, journal = {Proceedings of the IEEE}, year = {2005}, volume = {93}, number = {2}, pages = {216--231}, doi = {10.1109/JPROC.2004.840301}, url = {https://doi.org/10.1109/JPROC.2004.840301}, note = {Special issue on ``Program Generation, Optimization, and Platform Adaptation''} } @article{mortensen16, title = {{High performance Python for direct numerical simulations of turbulent flows}}, journal = {{Comput. Phys. Comm.}}, volume = {203}, pages = {53--65}, year = {2016}, issn = {0010-4655}, url = {https://doi.org/10.1016/j.cpc.2016.02.005}, doi = {10.1016/j.cpc.2016.02.005}, author = {Mikael Mortensen and Hans Petter Langtangen}, } @article{mortensen16b, title={{Massively parallel implementation in Python of a pseudo-spectral DNS code for turbulent flows}}, author={Mikael Mortensen}, journal={{Proceedings of EuroScipy 2015}}, year={2016}, archivePrefix = {arXiv}, url={https://arxiv.org/pdf/1607.00850.pdf} } @article{ketcheson, title = {{More efficient time integration for Fourier pseudo-spectral DNS of incompressible turbulence}}, author = {David Ketcheson and Mikael Mortensen and Matteo Parsani and Nathanael Schilling}, journal = {{Int J Numer Meth Fluids}}, volume = {in press}, year = {2019}, url = {https://arxiv.org/abs/1810.10197} }mpi4py-fft-2.0.6/docs/paper/paper.md000066400000000000000000000104121462034230000171760ustar00rootroot00000000000000--- title: 'mpi4py-fft: Parallel Fast Fourier Transforms with MPI for Python' tags: - Fast Fourier transforms - Fast Chebyshev transforms - MPI - Python authors: - name: Mikael Mortensen orcid: 0000-0002-3293-7573 affiliation: "1" - name: Lisandro Dalcin orcid: 0000-0001-8086-0155 affiliation: "2" - name: David Elliot Keyes orcid: 0000-0002-4052-7224 affiliation: "2" affiliations: - name: University of Oslo, Department of Mathematics index: 1 - name: King Abdullah University of Science and Technology, Extreme Computing Research Center index: 2 date: 4 March 2019 bibliography: paper.bib --- # Summary The fast Fourier transform (FFT) is an algorithm that efficiently computes the discrete Fourier transform. Since the dawn of our digital society, the FFT has permeated to the heart of everyday life applications involving audio, image, and video processing. The importance of the FFT extends beyond signal processing into scientific computing because it diagonalizes the Poisson operator, which is ubiquitous in descriptions of electromagnetism, gravitation, acoustic and elastic waves, potential flow in fluids, stress in solids, Hamiltonians of the Schroedinger’s equation for probability distribution of electrons in quantum mechanics, and diffusion of internal energy, chemical species, and momentum. The FFT has been named *the most important numerical algorithm of our time* by Professor Gilbert Strang [@strang94]. ``mpi4py-fft`` (https://github.com/mpi4py/mpi4py-fft) is an open-source Python package for computing (in parallel) FFTs of possibly very large and distributed multidimensional arrays. A multidimensional FFT is computed in sequence, over all axes, one axis at the time. In order to fit in the memory of multiple processors, multidimensional arrays have to be distributed along some, but not all, of its axes. Consequently, parallel FFTs are computed with successive sequential (serial) transforms over undivided axes, combined with global array redistributions (using interprocess communication) that realign the arrays for further serial transforms. For global redistributions, ``mpi4py-fft`` makes use of a new and completely generic algorithm [@dalcin18] based on advanced MPI features that allows for any index sets of a multidimensional array to be distributed. It can distribute a single index set (slab decomposition), two index sets (pencil decomposition), or even more for higher-dimensional arrays. The required MPI communications are always handled under the hood by MPI for Python [@mpi4py08]. For serial FFT transforms, ``mpi4py-fft`` uses Cython [@cython11] to wrap most of the FFTW library [@fftw05] and provide support for complex-to-complex, real-to-complex, complex-to-real and real-to-real transforms. ``mpi4py-fft`` is highly configurable in how it distributes and redistributes arrays. Large arrays may be globally redistributed for alignment along any given axis, whenever needed by the user. This flexibility has enabled the development of ``shenfun`` [@mortensen_joss, @mortensen17], which is a Python framework for solving partial differential equations (PDEs) by the spectral Galerkin method. ``shenfun`` is able to solve PDEs of any given dimensionality by creating tensor product bases as outer products of one-dimensional bases. This leads to large multidimensional arrays that are distributed effortlessly through ``mpi4py-fft``. Throughout the ``spectralDNS`` (https://github.com/spectralDNS/spectralDNS) project ``shenfun`` is being used extensively for Direct Numerical Simulations (DNS) of turbulent flows [@mortensen16, @mortensen16b, @ketcheson], using arrays with billions of unknowns. ``mpi4py-fft`` provides a flexible distributed NumPy array interface, which allows for further reuse in applications beyond the FFT. The distribution requires at least one undivided axis, but apart from that there are no restrictions nor limitations. The interface can simply be used to boost performance of global array operations through MPI. # Acknowledgements M Mortensen acknowledges support from the 4DSpace Strategic Research Initiative at the University of Oslo. L Dalcin and D Keyes acknowledge support from the Extreme Computing Research Center and the KAUST Supercomputing Laboratory at King Abdullah University of Science and Technology. # References mpi4py-fft-2.0.6/docs/source/000077500000000000000000000000001462034230000157405ustar00rootroot00000000000000mpi4py-fft-2.0.6/docs/source/2Dpencil.pdf000066400000000000000000001156351462034230000201060ustar00rootroot00000000000000%PDF-1.4 % 1 0 obj <> endobj 2 0 obj <> stream xXn1}߯3R{|K"3$h+"Q_bj3np&'sŻvj׍_82)ӏ;P, VF;#Uuuzs)֋M_Y Rڅl<UbV`XU<JY}681=b㓊v~!2GK,l|&1|묓w muA4E.WZlZ+XWYg^z5U$w<\,䋻ǸJwN.`ꖠ .'cl7Es0I#Ս!YX6t%`4ĐCRP] o 8>*:-O?۠{c ,BJZANgEc2F~H4)UaD h07#/Nv{ @ѩ sNeU'Z,KdXg$oo5Ť]. Y:O%x+{bm"@s 쓅two -T"EKгp;R$`JUKv{]oY#ssS`AD"b"l  9 ' \klI$J5;ko%!错CWD:4U\UF endstream endobj 3 0 obj <> endobj 4 0 obj <> endobj 5 0 obj <> /Font <>>> /MediaBox [0 0 270 270] /Contents 2 0 R /Parent 4 0 R>> endobj 6 0 obj <> endobj 7 0 obj <> endobj 8 0 obj <> endobj 9 0 obj <> endobj 10 0 obj <> /W [0 [777.83203 0 0 250] 19 22 500 38 [666.99219] 51 [556.15234] 56 [722.16797]] /DW 0>> endobj 11 0 obj <> endobj 12 0 obj <> stream xxE7~f殹wMo$ͦJHh7 $%(ؑ"PP B1`}D QTDB}|}]7gz9s̙,@y *[FN+Zdٴ>8=S$ K9T+QLhĩ5\QvwKC~(ƼX1Xѝ_T9D Sfww]n>fxy.M+=߷?6C9k+FsѽPV{FK BC}ȕ Z~}#oQ·6v<,C) &A8 poHƾsCYH|CE2{8pU|%xCo"=?h=uzv ^~w2aӮF@FDbB!XvyyA=Da!^ .C<y M.\6< 8ye̷f0HGes֘W>|-lqSLO2{>N.SMKNm5e M)1Ёˬ~*P#_~_b e!ϻyHWC/ \_AH;"V5tUwC:%_˛8$o72I }&HYV@ooJq4O@?7x'J|QlO11ZK#hI:l(7':(nRXwnugI cd7ãhyĶp3iU5?˒~ykbMzu=,dG! \?nk\> I&LNg,gk.ͩXD𽆏_b= =iuicPO{=F5 2aow,^\^ܮ܎gܸ!e1I Q7 4:}h?(grD:k0 ^2㧼H!0p]x)W$O zcʯ+,hY/zH|eg⾂ui"QH3Eh/~l soT ezB C^ }h7tr!IA!0 ٬7#vܣRq#}h@ٴb{Zs~nraO4T\Vl-Srz})1ip;6{ 2} }`{(0Ю&vc"t\K͸V>Jcnu: @ am'yFП+ )/Ӫ^QL D4\^ ϹNi&шA6dz&+pO}XQBQ7?7˚}b}E+ڴ+aI AGŰ~ylλڳz7 KDA܈t/ 6°qq+"- 8hwg ZSv.rwm INA̻}{B a,c嚅Xs _?MO]?O;1ZB |=t홨Kpy{D|CYt i 4ЕCkЕCكi{Be`@H栉^1*學_"[q/F4Ghl/??yi>??bڳoA6ҡHm.byKpxm@YEFghsW584\ygq_=" =t>RT5g0ދCRA`>h=w qsDEsh݊XxQe܅H^΢gE}Oױ=rs^R9Ot'D/8k,q*`_iҟ<)ʏhKMhnrۏz jA|3_⾑Қ FSJNJ kɘ4uB:WRwp ݋84n={`O11.G'}8wKMmؿw.7ῳsi9\L h)Bv:n\ofyTRpJE<oϸ޻1,hOswb;GZӐ"QMI{Eޑ&N-υ}<zp)?d! BSsϞ9~ΕFyB4鈧0 LDk^^fRiw|[Ed˒( fxw)!J*X*\{:܋2ݐ2yz7XAwd!.OJˀx!MϩZEЎg5ޝvGHG}PFT 멆˴/6RsU'Mz ^L0 ]`a9'iڮ|tMK6t];i7BKaw|~}2_Bs1IBe|#Ù: |exoC[ 3e;6 7D mDB0*Z\EɝyCsg,<O |{2ԺsC`'ȗ{bJ,żG;'~83௠:m< $ZA$ K>\l d3 l[aoSU(&%'T*vrUMs)=g/_n/_am޾]G#vstv9: W9*s:9eg3™Lrf:89rϹ.&( A ! 1 Ԅ .겹n4rr'y)y[݋qot׹ws~ӽ$ORqI&'UOI:3'ggt=3_{۹ϝmp5zj IGѧY"jصٝ1&J]~Y@dN!J &!p>#$P>T>R>V>Q(OCarTRJZWw1{|H>"O#DjjFjJSUPjjFq8l1>3>70___[NZ~fr圥GG+Q]jVd5EMU[iۈ0"yM-m|uP].VTKջԻ{{8.娻DR]VQUS8CT]UPשO F)iuZnV-VueD1Fʈ3 0F2 d$)=ZwZVj=^R[Պ~Zm6P ֆhCapmv6R+FiWhWj1Ztj6Ҍt#4l;%R.n^>mv\[==R+JF''O BNS7;9M i 9KΑ6PB)eT2UJ5A jVHJH& $dHEi hv@ڐ"mi"u$LSh*mMӌvF{q8n`h^4f,MږiQ^G t.Go7ћ-Vuz]{C{S۫kokhjikhjikhڧ!vDL\B;}}}}}Ӿ׎k?h?j'_گ)7wOmrvF;+ȡ9A9RQrtIuEWuM-r+xٮ[@=H)'.=XC0=\#(=ZcVzutUzzk=MOegYzFzI;IrE/ӻtw{zY*VժYuk}"_!P}>\_ZVk5H/GWW1z^ez>^O+_Id}>UWUtZ3Yt1.K]nzG'҇t%]_mj2~3~k5:Ic(5NFv3`v-g=6&mc;Em>}>c_oq#DГWzF;ɹrg)icq8k3 /=MgYz6P/FeI{&)rEΓ,].{ʽy `mc/oً %X U"1%Zӏm%QIRRJdJcdJӔBKQ*EJ?2@ R+C0e2B\+0 c fGR)QʔJXdnZ-hKiq[-4K22r 8Ke5E>%ar|F>׽@h %@ #`   /QtZC:ce3oo;z.Pv ÿȗ+pxƇ,&wldCٰ+ % fHtt.rjCI m+PV'+S1{|(GQzSQQJeڍRES.cY ;gmD&Ɋd  mow8\iYmrڶkߡc].͓߽GAa^-A 6|#G]qƔqe'L4yiUӫg̜u5n7̝wM7rmϿcE\e?|y{X0 ]=xdT'wZY'6YX4ԁm[G֒[AotBS2Q&.Tvxvx@Lc.gmXWqiAa`-G)m>n 2=kSOycW4š1΁=1uNy'X0fO1Re8zokeX+wJ\ǿ0ԌQܶ_{4 ii[sQ{bpYUes A miq,dɧwabvց'+H0Hy 3~9#>`1>2 szfr5&]fm@d#E߽Ⱥ{{駈6#A:G]9]bYz=zȹA¤9OaS01!czAnU"x: 7un;0/1Uuo"ϷIע{D4vte*QB͍7 (BȰ]ft*&Frv9} F CD3!-F-(VNvn'T_ }&@>&+\ȷnͷ̷[Hp W.9&M9_֟ևNeN^]+ ]L}1W֎vk$:tnpcK~Tg(H]^ [.suy+Qw9r-K^w#_G!AA*G?l{.},`]lluVK?ԥ߁)HԹy'嵶 Hyq়'{cS.K xG d u.18p*ZKt:܂ y+$չnZ}PJF`sҧn]u}ĽYz2ξ;^Ä]loF&b^J7O?k\S*/k}uӟ݀8\/o`';ݩٲ&w%ذ[Lr ︓5Xu:XU3TRôͦj-@4E4V=I/)xQ$Jo?c3ԑDvjCY-ڝԆAѰӊUNiEڠ+Fn&b DQ $űιuqq1)=:jO q+GAĬnQB,Sbi?QiM?Qqˊ]W\=޸^七N;h'#wk!<\[Pܘ hf/L|u%bX N0DQW"P|5$HTdQY+iۍYd츉tԖ K]$yO.ulqFn)/+J ۣ肶hl܋T6Wփ"EOɓ uCgNLat;ATaEA3fp̜n(W9'?OzJ HmW޾-3v0PÝi5$ =fFZppiB2c& +pB`&;ɩPw"J(⛆޳dp5La)nX N6FMMAp/} a6yB c ;IdXÕP _<5!Bsߢ[;0Wa'BB{txwC$xz?%In : w1z==MCE۪J}a 1Iv{{p/-ǚn:M3Yy58 .P$aow9>?4 SibxES ـ߷Gط" ȥaZ"->Q_ɐ#xؿ*(/wjwaç='[Rw+F"C^#SKB;nI'[{/O?߰Vlw%>F--I^9j[yj+5ww6 ={ep ~ף$nWMgB{!.tn}H2{9R2L\2Dn&>o!O❛}&_2f7MY4GڃNtt A?`̍ڶMg%;-Q)]ʒfitF˅DyS+M7YUQ:G}OjnV⸷]&U̐äka\QJ G)td M>zY%Awf;d_KdHwgh%':Wߕ¥o:nn>䰼  øoch]#v@?)_J=i%|\<]-CU4l@{q2}!ϒOI=lR+f^$?$$Nt8{Ng?FaN#`{i2B&Qߟlxkl#y!#,@6o@g\_w$9ep>d`w)C'*? Qwv.ȏXET֛톶 foOV;a}k'0;5sFQ̃xy Bi34Z)"@uǸK>rNqmm툷N1nE>seFF]hGDyjIU8FiWDׄ:&2tnZOTOc8V;#Yrךx6FOvnf3KtKߧS=8hpHd\jez}hL/̻&_rm8c shJQ]PΜ: c!ırZ٪$w|wd{ጛt'Λoy;Wuwñ;e9ct+ƞlc7<8Q{}we޸ 9.])):ť\u s$ODB=,EX}Yǂ LœHr=.lAlYF,-,PqY14[pb,2YFc hcҊzzx[pZY%V,˓Ӎ@ɱmL݂ip'$y(eQ.'+mt!NUOt\NpN~'m s*TUIFR[3-gHk@x#N TOMB]\)4<,Dr$Ą$iӱCGmW?pN<۰iǂ,icȓ~G{~^:}]ضu<llP%HiwF \2qIӹ}Vؚ̨VsR2kΎz&qgʁ>ITm)I.)ٙ+2eiڪՇbu)ď#?I8cJ++.E jbU {lqq)\]R[RMB ㉩cd޵[{$L*swLNP:XpBOI ltx ᑁ+32[8iN"..4TcXƷ/8*,sCrs*15U;%)52-IlNrtjN\c}vW)!;[.EHs=TRFpZnVw9|M-[b$JܳmGb%?mgo?$nova<{^æp_Qݵk xdBq#fLr:]6BGaڥ(~ r'r ]kd:(Ƹ.hEߓ@K v{l3=8](+:*RB)Vuj|vV>A(jnZFzIOpXYLk /cZiAlR:6Hmwp pp$ %$THGh O iY'봴ѣQ&h2BPD$$ q(<{sG "ə̵2U@G؂,pbEEidt@1C;ù)OB1R(pɑWَmiv-x;[%>yGVT+ ~>oVuu!vdF\+NjSA5,3%L$ӯ@)0*Eƛ~ qu~+]Ak/˷~AOAmLAfeG~$ӯ JYiul7V_ez ~6D?VU?M5AuCNӏ<~7C=#Dӏ<ԗ~䡾#wL?P?i/~aϦyh\#!o Eg w~ &a"xT IEYxcE?N(vDeo-1f|A}mKP< ȁlv@0HnfC遡js+DLLɇ)uk`#-ܳ-Ü0}uW\W5`&TeoJ; #D3~:>chqZ0[I/U[&k<_n ֆm+DJ)F L#q|nxDr1T!>W[n< cD QD1b:MB=saj c/懧f4tl9Z-8kMǘ"~o\(#qzf <*XQBj?;)UBʰqF\\-yq8ޙbe"o%e"JYUa0ά7z>yl/8mlbs.q,j\\|~ui>XjD{~Z1WWv8]zWU|mT#JY뫇修s#';crGi5=**Kk**e:Lq 0fcHYe*p (1rj!fN),awQ^=tnH_1rFffl{HD6hhaͫ(uTO-8*9j0m2КtZYVeS*gN(yJqiǏWNR>VW̨Q1WZ]V>&mNqLَ3ʱ?R:QU^=ml1Z+f᣸zbŸM"6n2,ZS(Q5a 0saiSf;R*RSR}.e|3U8S4/xlE [)9X]U^=mJeiFӥ$4F̚5Ygbf#=RRp+J$ouyOGl3{B`;kiFZk5r.?硊}vA>.jy_N?.n6RK Z^ų}z"%0sMQ5\ET endstream endobj 13 0 obj <> stream x]Qj sM[zeBۖ}ThT9OB>gFzm[<7gD,N 8*M~CsV5} 4=>$:G8|^oP{`i@ܾ &۱W~=ϟceEF#jjj?eW?/ j.|LT%Te)s߼"TMP\>X lJ΄ endstream endobj 14 0 obj <> endobj 15 0 obj <> /W [0 [777.83203] 91 92 500] /DW 0>> endobj 16 0 obj <> endobj 17 0 obj <> stream x|?|fI6Bl6!j 7 $ %B@ീ bEP AЫ *V n>]9gs̙y,ĈN IYU;h|1QX̊UWh(|Kk#wXrOTA_'B)3.=fwGx)UF!1 woxM7y±̉D'^4ctn$6N[n=phHҌ.~r'DIגƗ=nBXfqu'wLfHj,tN 7}B:\ >RF\gDN::5*z>i8FوI?ᑠ7|R?8 d#70 .#o=p#e]`\HڋP20G:) F{>iFD#_7fBBD(4 D|[ԳVTyV-ǹC_A!~(s)Iلw%((!c%ң"/ yQggw};QEQr/RspLOgA?ѿOB[J$ՕwQ=xE%;}䫕g$>G.@V9FEHW@o*.(AR4Lt!F򕔨~Bm `V|*eRо7&F}P6YA9B⾳1 ]̆1H_*ueDy[:ܟ AHbnhw$0HyPo4'W)CYqgC6Bsf L( : />> %t&H8N5-S-Эf4]辠JC:A]e%b>9)2"}Ҡ,j"mW;yNv|V9z7vP&wwood;%}'o1M[B[ϯ#7-vhW'oki2/|>-Y"  B0hrBo5jЊ 6ry?uӨ')l]Yy m*Q?hmԹ뒤A}=s $H*wP|_RX}I&Rg =m=QR/[(-1~i$[Ҧh+_'.@G0X;./] ]qn~|T/lYZ*vM먗WZu4RQmyb9u'ojT#R0/E l.`p ~t}EJ`ZsvZoiXkv̷]e~J (_` e̥(% ("m|?Y6c2bU xهo`Q({۷t {R,# '"[? ony_,7фA>LBZ͗i;Vrt0a}( (94",sW?hJ4kW!W.zKqgwS.HA|ζqA[r<-F0w^^^-FU H']sG5'_%:u3Փodqw.9шxt;bu4˞ _'@ߓN<Q&XM_ /:"OO,D6UzJ О=mF{3P)@f=NB/>/+g?6Qo id kubl?X\7VvVN >@!ϯ M{@P=ð=#B8uyLh=sZvm8a<ȷ]_;?OpK/[%pK.bUCy4aȨ9`cJK݌+g>*} Z)@We0n?N"w.wl-V?̤Qr@b?O}:h)B"px&c+sl~S,}9*lߌYJN!ܐgv9֢,q6/q˂dԿ@'+ξl$`}XKn2ϒXBQM(( <7xNA:+YWq >H6<†>Zc,b8QF;31>o/ bM HY:L<:'C$qӼ`9N5_u7P}3ҳw6і-鯤R%F5Ɋ5@2nٗ`[ɯ <SNgc99\Y\MA(R"e)`MyVg[+}[/q h/΃3U@8t W7s3sNAӆ~P>Mj菀2_ҏ욞VӞ :Hp`|@Oyv1§=]Sja/yiT?\Ei)JLCEe@߃{3%uOJ4^F ̭rRy? m< V GM0hGqB/Ze& 7yQoK9-xA-oOK q|gS[~^OK +m ė~M)-?ǐ@}xo@-VOc [| ;/盀< ?g%2v|U_alemʲ6 7FɶEzyߍwݟ_1rϝ87@ }#hs_=@<]ri|.j,` 0\rTJKXl֪%]Qv}3tCEXF ЊPA0 6dv3pr9>+T[K8 Qn,%OS6On(/ezOºIg϶cr[(/Px.)GwbXA}7 6W&-ϟrG`n7lLM>:u:63-̧g6t|h#HO7POll|;x'Y}&F54J&+h>Ja5Z ~^r@O8C;#1/&N \TEo?~v?_6?aBׄhIYg9 γҿ>{0ggzOŞ.OI3?;E#O?}8ɏ- #hwݜ(3  P(8?Str_ /7`CI@S וc@=?kS?$`tcP.ql?X>okݽ3[ |,}1|^ {*Ё延Me~=i=|v9a ek]>s:z9lgٲ|7`u@c?o4#Z(7s Gx9|f :eOKܖ׈j;9_PKG#r_0u(w%ޭAj/RʡC@-\Z~@#co,z!A?] !M,υhmT?|eov(4Ŀh߷H?J1+~wZþO׳´XWZ&%s S ˳dϾRk'l(,FhyqhzGwr{˵XZB>\K\xbC_&*N$9& = 7 ϶Z>:۳?{/]/>OiƟ[Yt{S{(6_u'֓yd퇭v> Җ߯mP96N46&AΧ?ԐgW8ep=ȓ䙤EmmR̆c-`V喀fȶn*֤D~02`: @{lRnNzGs '֚1W'bWO|f;SsXg"廾ovQ7; 7x~=xOp?98 ~3FpsY, x76qr@~H^|=hfIu= +h~Q:mn>}w=里E}RS'NNZ9=옇>tzx㽏xOFO Oē8 S˔!({PuXI۫ICfz^2|Bc"%.L\\.kkX@f\ ][\/v}Yw;IfS!ݫOऑICf76,2KN Ȭ?-:|styS-^{>O7]UOrO1%9y$Hؑ####~D;GKq5z_?\{蒯ҡ'C?spyyC7ڃ+8pXrc\|@﷿d]wڟ?}QپoO}$J{a3l}kmWϾR%KڗxCO|`Ә|wwwG y?5Ǘ)Leb3]]im͕~"[9(?lpiY\^v2\ f%/]ď4qePt5]\H+Snt/=L@W-tet;-fӷ=7@DD7R%LU"DzvgF{h/=F7tI4#tMjF3h:ͤTCS-͢4h.]Bӥtͣ|h]A Jmlq0itN;؝.v7F3c{Jf1٘9`k؃!=ֱ({m`l#k' ŖqmaO,ml; cN"}"Y{=ڰhv='{=vgs,gq-{ųv,%~cbnĒً%2{vR=l/{`ovڳtց'-}T^_ߠ/ooooooWwh)]~~~R_?R:}A7 &qu:C߬oџзS?gs WW]n}W=TU6L㚢隡4O߯'g_Go.{# nDFƈ6bX#Έ7 F2FlG?l?l31Ǐ?;~qqqq1#FH7:m1W72615%RzcqܸѸɸٸŸոަ z׸ݸønذ{*cqqzާ}tޡ5ƃCZacxxx`ckvmO'=מjOWoVo1}bgW@d6Caps9e6ǘc2syy9޼М`L{=۞cϵn+{G{̼\nhdlbjf0o704261W%D>:;ʾcv~f_o,g'I2Sgs\:7-ncY<0Bڵbmvv[]=JR[=ǵ-vI_Ѽ^S7w}AcvDVAY;1ཇz~=Uow3l=W뎝Q/}bK~z}~>P/!P}>\G1yXL훂Ql]q_94q;8 $בHwd8r=RPHy9ĄIJvb,lmMi6fLe6-j 9m[-}??{{_am;wK~ľ6R_di3mOh=N_ӳH잆ߡl2ZUF*l6G9fW*ߡ^AOٱ KwY!-g\v.rsK m'Oi}Y"7_]|7 ~#1??Ϡ@GjъKq+IJ2trVUAOjô ZV-ikk;ojZ6Z\j+iJ{%]mv6|yy){dBoRl%GU:*yJ'34+k!ڄ&SK+eohf_h!C#ho:t8 :si/9 ZS/Q(]c؋~T˚`i쎐0gxDdTظ]dO75}z̬܎y:wڭ{~*,ݧoqIX:hÆ9jƖ;N(*&_4z3f^ UW_s-Yzxͷzۊ.ޕVw?XCk^^y6ol-Olݶɧ3;=˯ko{&&&&IMUT뜂=wڥs9YۧzS=grNnȟ)'s +]q*qǂ_V)s-Q!P];]%NYRR^6m}=}lYfkWbz19!T}[OqI}X^TT>8>),+XO>a2 } ٌZ7fXrV'M,pTz*+.[T63nq}ecOQyD߱+KJb"d"wc&kY~K!ґnů)[ϮAn11&<%"|LY27z1/mۢmCԶĽdXOR}a(Z2bަ"w)Y~n 0LUSdvh+= uwOr'c=SwqNK&uG6|JW~T[}˗8{ )k^ǽGlw=_uzLE F:$ -iR4}qG^2%+sV^uA >Vȁ]&"PpXM('6E`JSrqy٦Lm挎,ңEAr?tt%ٖ:-Oޔ?WԹވqcEi~jc}d߱JzSԼU("e%^j C/*OP-@My{ ׽!Qr`R. *bG?נ|9@ P!u&Ze*%ʭtr!,@nP~W NAaAo *sd]nHt9{'"  [ *ޓĕ.[:O! Iyo[ ·Cr!"`yˑry.Trhoxzp >rqX-B%c:zu2 JvѦ'u2yS\BS!&44@D*ZrتMmU&NQb@U&5丶+ChIE|@],%P] XrZ[\lYZ%9J! շ ޣӍz8{ZC߫ҏnVzPQ_[77^k_hWv=^df\[e|"52y:RU& ޭ\LݘQ\(PWB TC( / a C,*R@m UoJ @o(bC!C0B Az wN4]yiE,?QT#էtVTP;( 'Qk<5ޚ5kԡޡ퇮Q =kO7}qy]]k6 zfОAA5 Rmj͓4+憸yz0  p @puG(b@CGyHdD:?-]iih0UA#2 q=$.\eq̍?&F{8'jUyyQ|(E!۸(:LD@[+wuZ()EC~k7nפ"{{ ;P6+;"C5(*)פBK 7)⤐sDv!!H[$avPqBHgRv6DtePlIuqӿٳg ̙kݜXWI4O1q;>}9)Sk ,/XU@3 $Ik$/O^!Y RT*deSR,ۜ"X7g ˘wldo3ϢHtF 7&d)Y2щU6vb%Cw^,hCa'[08ތ2>%)yJ|_kf u2;eaBu32H@(8f9Q -4 0 r|Nh8wHDG ZE)tuh,puѭԟ yHc- v0E!K@=%Tb \Kior٨/=Ft6rvP н Km>px &l˾i M VT.Mb<]k3,}FOc-BMqJ_m5)Ρ!TԿ{,uT|i>;}3 ~dй4}tu7ڻ[)͡h!z >BXG֑?a:FrZ7^VĻ -k4=\EϢc,yЂԩjwJqr~aP<%[; qT#w9zcǹ{ԝeQ͐m*A߇"HԽw6FEw6`lcq'aRW^Q]55E<<;p}3ƻvK KeY(ɋ((ڵ4-!ZrC oY4ΦcF8E魌Rʔʭʿ,uvV7*g6+]-JLLݠ?MпZt%-/7j[zޢ+bIs5Z݀,^bė'۞w兼//{++I/wEyVZU}ZC+F{c1|';,;y6߸|c|/eQ6z:u-*^@_gi6dp5ep|G{oȦເ-dgW2vxְv|b٧K=si"^o-=7mm%RV(+w*))o*\TsuzzG}M}W=mR{N;s^niL/ՋޤZz)?yjhHL磔 dvTqZҏ԰1d|l=㟩m(kĞ5J_]}Cm^}O|rrC[j+kVH:YߎBy5_Jcq.{Ŭ򦺒>Q<Vjb~!gaqOD]L6*bOVk ݪ!]J{S!6l?G+O{.2\I^43VVkˣXXSbkjKg)4ri<z`n|Xh;tp1;rBV ?m찖1<pZeXR ]ܘY;*RWK`a[In7koPCWBӅXs>FmGbq{-8G{aSMF šUnw#|W} S[ ;@]iVhjgؗX}#//} =IKw`; }ޢ6G2$4aA@nԩqb:H}\FS|ay5۳5ݥdS4Aڪ7oAsShE+ZъVhE+ZъVhE+ZъVhE+ZъVhE+ZъVhE+ZъV-8kG?C ٓi4nO?fFq=tN K'McRl󧂓C ,BtM O کt­8Q$~ܭ &2ZL+owɊΩjselU>z<+ɻBY:TX92]NW+3ݖ1uXu.~κa9٩cm*e1$f'$gg ϡB1UظΝGXeqt<:'wqD; Kd9,0O7;;SiA.fx #7K#:--#S'GΩ#S7='5֦8:]ڱv.܉lq)"<1ҳ,`sle%kYtw+ cq.EqqƩ"bSXD'yWDKvVfnNӓYʧ|]fd4c_;Ofϸc㿆&9 $)p,(gx䓐ۢE ȟx!Rs#)˿g΂ЂE΂?/زl4]L?bm۷~yʏq'F|h"]xaHQ0V" hQdLhVa/4bq*DmG{aw򍭾C y [B"c"S m2"$\|'E;a@fQH4&?O"OoPQdfQ@oʢӋ6yh9FBӏ&"$mbcbæz$ 0"c$uiipn]uKKMij I{tɩkǔ5 qXV\dѵge5:ncmL2eR6 ^<ƭ:*))Q114E%\;1抣r )',SeetP4G:|t0L ,,ldnF댯3·0g XD+ey,"`<6¢:uo1Y:j?) R[э6gw|[NWhml*xF2$xN[,ZW`x |j)xvޤ\9[t- !.eE:3 ӶxNP'^u SAIoQ_}Ka_*hai.'Jސ3%oJ [B 24 !C+%C֊Zxz=Cֱڞ /M34BvѷD;d|C%CNѷPx#G|3Ymd=$-.8Y?x'֥K*7FQjRIV" 9i}F:S/7jp_u2oU噈͑5eSݐS :SZ=}QM\yEX'#Rz:w Βf"f_ )7!Ak7R>Rɑ\E=s~dYcJ̐R m,i :H"@Z_he/.mM"$wE[kL:>۪0)Pb~7)yr._pKm֙5wuR:U{e}%uɯU4̣?ǧg3R' ꠨?J\"G^#gؙGtiR ̊sFaꖽۤzDNKxvvuR\3n^moͬښYu53ݽOwhJlUVUfQ5=3GT]4gzŬ`-݁cfF.ٹWOU3fr]zͳewT"Q 9xTg+u**fT̚殙|Ѹg6zfu]U{d]EjYS3]YI5sfͪ}JFKKg^:yr*wty(:zvLIu~PŬʪujgTsϙ]kR1][5kFuy%F,US9gR%S'MiVzs*Q]Y=v:P&!v̜>ݾ:]5c(ug^)> stream x]Pj0 +t=,](!) 9A~c+Fq6 l1̌I7SK>~`;L0xrsX"8zR38o N&*-nN- AU~vNхJC4wK_8!%8Lz6Lާ ?~p"QUGT?M~w.\$Y> startxref 39381 %%EOFmpi4py-fft-2.0.6/docs/source/2Dpencil.png000066400000000000000000000151121462034230000201060ustar00rootroot00000000000000PNG  IHDRxwPssRGBIDATxOn][?4#R^!V(i!! PiOpb{WaE B~mfF6PgjjMߏs=3Ι̝3u{ϟ<<ϙ<;.[zL۩o 3%.u_ӓS;>܍ۤ׷;|Mz}KGvvl(0 @@o4|V?TC6Fi"d ~mDYP | %  6J @ K@gC5hm&"@@j & (MD, @M@QY>!@@o4|V?TC6Fi"d ~mDYP | %  6J @ K@gC5hm&"@@j & (MD, @M@QY>!@@o4|V?TC6Fi"d ~mDfUg;ytc묳Y[+΍;59 MoݨMz}KÇ?ݤ׷=|d7j&>h @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D lYWӝC`?8+\ v,YЭ5'Ju׷n&%n[Zn>`^[4QWb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4Qլ+x!0X;tgy֚MNB[r7j{{po^anno7--7wٍypI-+F1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQۨjstgrC,Wq:3pK3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D(mT59^wwgg!+88ksS<>܍ۤ׷;|Mz}KGvvls&Q | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@6u;;܁˕`g 5ZsrڹɩTWhz}~_Fmom[>&#Q;6Eu(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@4|T;C>gi&D v(}LQP | % ڡ >K3 @ J@GC1}f"@@jb ' ,D( @O@YQ>!@@sN}y;l_8Km p  ;j{8o/Ω8lynTPW?(c{:|wz+c] Ċ%pkyc߿خ;oLk+f5Q_MX/o|t:ڊY`MW]A1>[3[`m}OMExZശ"@_%֢ ώ p3ώ1wt';UyϽ<)Vo߿O4\MvXJ11eINB[6r7j{{po^anno7--7wٍyp0Pv}?1|ܚ.u[ @ Lu,< "?3xu}~OH87Y3 ox"~~Z[ @@y?8Q{w'P @ I"N P 8;zvpp(|\ "@6!@^ n  p?#k'@ lb|~Z[ @ H렗< z{_%0|kJ]|XsC-pр߽t{X{d_K^=a Nɻ*'["_pǞ[k:^@ @.~Xjڮu2??  @Z|k11juzγ @5 Kwv+z|S"@X*j0'\TS @ A@'tA $@@O p PMI5 @  > j @+@5%|B@+WjJ$.W ՔH ]P@`0cǵ\N`9ֳY^p,MPN EsR>n!2 pR@O" oH# 'E @H/'I!4x"d ~jxcxWXSIN)D}b]'9/sbYCPUƨϵM:Vgc9>c1=3Ժǟۻqp>uR;iiT*&/)D'|Q'-Iw^bdR_瞿lQhűڮs뼲?_+w<*w-5_c9Gcis4 ;c_"NlTMKQTKm;$ ٳB-y?ᾬVk \׵R:+w-O81ްxlhq,}3Ųnq-ygW_^5F2_ޠcz-/-^71f?o8uοM'W=|igQB-ocŲKeYU/;g1RW^1Ƴc,e;u=W|Vnkű$oe?4 ݀s8u{K Zc Y/IV~eϝد9k+E`:T\ُIoh_I[2_q,PeYZ־0Шw?;F2]}/ "3^w}IJ[255'wN'“iOٯ~~tuoiqƨ3Fc8^[};ı|#6#'쬭>-C(UƧY+ nm/w-wNٮ7_5cOyJDsc1%g.OO->W<9'*dMGӁړNGci{^O/Xm#~#o)z~}zƨ}߫M)߯bykɂo~zz:/uzn]K=+[<~,NجZk\~Dhy pB{;XW狼5q4ܳn`yh>c=_5]{O1靱Ke1[4zx*.)D>=FY׻]`$YfGc/9 z|4F|9>kyj coNY*\vioNjzsjyIޜ[Pz bؘ/$ݷ%Ų@.hױ]^/TgI}u[WjqQWt̓z!uQͨo\?TRͭoR?p7þw‰?rsNLq)7| S{n; 쇀ߏ>j2O @~*  Z@&쇀ߏ>j2O @~*  Z@&쇀ߏ>j2O @~*  Z@&쇀ߏ>?_]'&Pd:[cc~\5O'ќSJ}"@ @ @ @ xT1IENDB`mpi4py-fft-2.0.6/docs/source/2Dpencil2.pdf000066400000000000000000001156521462034230000201670ustar00rootroot00000000000000%PDF-1.4 % 1 0 obj <> endobj 2 0 obj <> stream xWێ0}W=$xX}^Z@Pc!7uJorN$q2X,oWgN$T` <"Uez>ۓWk.V)Z4Kb6e&~FKfOVWq镪*X9 )_/!Z%YT;}63fiVɅmK֥i:_(wKs4禍[U7*/l[7/lٷ=jCʩD`]+t2ֈ,NrhEǮAC$@J*okK*o4FKuڹX9CLux:w4m=qh" $ ~PݧýR4u9Kw,Q{ߔb{O3<7e8^_*'On )Q SYB U BJX'2hKKHV HUƤ@x $U BJX'2hL T!ʪcjU g9Dժ@b {|XBӞU*DYuTWE U"(UV7\ HVlw6iA!SZ8c:k4Oѱ0ɜcBۂE\W2(zS:(~yojH!X4\wǴ@O6DXCF-򴷌цsK=;q endstream endobj 3 0 obj <> endobj 4 0 obj <> endobj 5 0 obj <> /Font <>>> /MediaBox [0 0 612 612] /Contents 2 0 R /Parent 4 0 R>> endobj 6 0 obj <> endobj 7 0 obj <> endobj 8 0 obj <> endobj 9 0 obj <> endobj 10 0 obj <> /W [0 [777.83203 0 0 250] 19 22 500 38 [666.99219] 51 [556.15234] 56 [722.16797]] /DW 0>> endobj 11 0 obj <> endobj 12 0 obj <> stream xxE7~f殹wMo$ͦJHh7 $%(ؑ"PP B1`}D QTDB}|}]7gz9s̙,@y *[FN+Zdٴ>8=S$ K9T+QLhĩ5\QvwKC~(ƼX1Xѝ_T9D Sfww]n>fxy.M+=߷?6C9k+FsѽPV{FK BC}ȕ Z~}#oQ·6v<,C) &A8 poHƾsCYH|CE2{8pU|%xCo"=?h=uzv ^~w2aӮF@FDbB!XvyyA=Da!^ .C<y M.\6< 8ye̷f0HGes֘W>|-lqSLO2{>N.SMKNm5e M)1Ёˬ~*P#_~_b e!ϻyHWC/ \_AH;"V5tUwC:%_˛8$o72I }&HYV@ooJq4O@?7x'J|QlO11ZK#hI:l(7':(nRXwnugI cd7ãhyĶp3iU5?˒~ykbMzu=,dG! \?nk\> I&LNg,gk.ͩXD𽆏_b= =iuicPO{=F5 2aow,^\^ܮ܎gܸ!e1I Q7 4:}h?(grD:k0 ^2㧼H!0p]x)W$O zcʯ+,hY/zH|eg⾂ui"QH3Eh/~l soT ezB C^ }h7tr!IA!0 ٬7#vܣRq#}h@ٴb{Zs~nraO4T\Vl-Srz})1ip;6{ 2} }`{(0Ю&vc"t\K͸V>Jcnu: @ am'yFП+ )/Ӫ^QL D4\^ ϹNi&шA6dz&+pO}XQBQ7?7˚}b}E+ڴ+aI AGŰ~ylλڳz7 KDA܈t/ 6°qq+"- 8hwg ZSv.rwm INA̻}{B a,c嚅Xs _?MO]?O;1ZB |=t홨Kpy{D|CYt i 4ЕCkЕCكi{Be`@H栉^1*學_"[q/F4Ghl/??yi>??bڳoA6ҡHm.byKpxm@YEFghsW584\ygq_=" =t>RT5g0ދCRA`>h=w qsDEsh݊XxQe܅H^΢gE}Oױ=rs^R9Ot'D/8k,q*`_iҟ<)ʏhKMhnrۏz jA|3_⾑Қ FSJNJ kɘ4uB:WRwp ݋84n={`O11.G'}8wKMmؿw.7ῳsi9\L h)Bv:n\ofyTRpJE<oϸ޻1,hOswb;GZӐ"QMI{Eޑ&N-υ}<zp)?d! BSsϞ9~ΕFyB4鈧0 LDk^^fRiw|[Ed˒( fxw)!J*X*\{:܋2ݐ2yz7XAwd!.OJˀx!MϩZEЎg5ޝvGHG}PFT 멆˴/6RsU'Mz ^L0 ]`a9'iڮ|tMK6t];i7BKaw|~}2_Bs1IBe|#Ù: |exoC[ 3e;6 7D mDB0*Z\EɝyCsg,<O |{2ԺsC`'ȗ{bJ,żG;'~83௠:m< $ZA$ K>\l d3 l[aoSU(&%'T*vrUMs)=g/_n/_am޾]G#vstv9: W9*s:9eg3™Lrf:89rϹ.&( A ! 1 Ԅ .겹n4rr'y)y[݋qot׹ws~ӽ$ORqI&'UOI:3'ggt=3_{۹ϝmp5zj IGѧY"jصٝ1&J]~Y@dN!J &!p>#$P>T>R>V>Q(OCarTRJZWw1{|H>"O#DjjFjJSUPjjFq8l1>3>70___[NZ~fr圥GG+Q]jVd5EMU[iۈ0"yM-m|uP].VTKջԻ{{8.娻DR]VQUS8CT]UPשO F)iuZnV-VueD1Fʈ3 0F2 d$)=ZwZVj=^R[Պ~Zm6P ֆhCapmv6R+FiWhWj1Ztj6Ҍt#4l;%R.n^>mv\[==R+JF''O BNS7;9M i 9KΑ6PB)eT2UJ5A jVHJH& $dHEi hv@ڐ"mi"u$LSh*mMӌvF{q8n`h^4f,MږiQ^G t.Go7ћ-Vuz]{C{S۫kokhjikhjikhڧ!vDL\B;}}}}}Ӿ׎k?h?j'_گ)7wOmrvF;+ȡ9A9RQrtIuEWuM-r+xٮ[@=H)'.=XC0=\#(=ZcVzutUzzk=MOegYzFzI;IrE/ӻtw{zY*VժYuk}"_!P}>\_ZVk5H/GWW1z^ez>^O+_Id}>UWUtZ3Yt1.K]nzG'҇t%]_mj2~3~k5:Ic(5NFv3`v-g=6&mc;Em>}>c_oq#DГWzF;ɹrg)icq8k3 /=MgYz6P/FeI{&)rEΓ,].{ʽy `mc/oً %X U"1%Zӏm%QIRRJdJcdJӔBKQ*EJ?2@ R+C0e2B\+0 c fGR)QʔJXdnZ-hKiq[-4K22r 8Ke5E>%ar|F>׽@h %@ #`   /QtZC:ce3oo;z.Pv ÿȗ+pxƇ,&wldCٰ+ % fHtt.rjCI m+PV'+S1{|(GQzSQQJeڍRES.cY ;gmD&Ɋd  mow8\iYmrڶkߡc].͓߽GAa^-A 6|#G]qƔqe'L4yiUӫg̜u5n7̝wM7rmϿcE\e?|y{X0 ]=xdT'wZY'6YX4ԁm[G֒[AotBS2Q&.Tvxvx@Lc.gmXWqiAa`-G)m>n 2=kSOycW4š1΁=1uNy'X0fO1Re8zokeX+wJ\ǿ0ԌQܶ_{4 ii[sQ{bpYUes A miq,dɧwabvց'+H0Hy 3~9#>`1>2 szfr5&]fm@d#E߽Ⱥ{{駈6#A:G]9]bYz=zȹA¤9OaS01!czAnU"x: 7un;0/1Uuo"ϷIע{D4vte*QB͍7 (BȰ]ft*&Frv9} F CD3!-F-(VNvn'T_ }&@>&+\ȷnͷ̷[Hp W.9&M9_֟ևNeN^]+ ]L}1W֎vk$:tnpcK~Tg(H]^ [.suy+Qw9r-K^w#_G!AA*G?l{.},`]lluVK?ԥ߁)HԹy'嵶 Hyq়'{cS.K xG d u.18p*ZKt:܂ y+$չnZ}PJF`sҧn]u}ĽYz2ξ;^Ä]loF&b^J7O?k\S*/k}uӟ݀8\/o`';ݩٲ&w%ذ[Lr ︓5Xu:XU3TRôͦj-@4E4V=I/)xQ$Jo?c3ԑDvjCY-ڝԆAѰӊUNiEڠ+Fn&b DQ $űιuqq1)=:jO q+GAĬnQB,Sbi?QiM?Qqˊ]W\=޸^七N;h'#wk!<\[Pܘ hf/L|u%bX N0DQW"P|5$HTdQY+iۍYd츉tԖ K]$yO.ulqFn)/+J ۣ肶hl܋T6Wփ"EOɓ uCgNLat;ATaEA3fp̜n(W9'?OzJ HmW޾-3v0PÝi5$ =fFZppiB2c& +pB`&;ɩPw"J(⛆޳dp5La)nX N6FMMAp/} a6yB c ;IdXÕP _<5!Bsߢ[;0Wa'BB{txwC$xz?%In : w1z==MCE۪J}a 1Iv{{p/-ǚn:M3Yy58 .P$aow9>?4 SibxES ـ߷Gط" ȥaZ"->Q_ɐ#xؿ*(/wjwaç='[Rw+F"C^#SKB;nI'[{/O?߰Vlw%>F--I^9j[yj+5ww6 ={ep ~ף$nWMgB{!.tn}H2{9R2L\2Dn&>o!O❛}&_2f7MY4GڃNtt A?`̍ڶMg%;-Q)]ʒfitF˅DyS+M7YUQ:G}OjnV⸷]&U̐äka\QJ G)td M>zY%Awf;d_KdHwgh%':Wߕ¥o:nn>䰼  øoch]#v@?)_J=i%|\<]-CU4l@{q2}!ϒOI=lR+f^$?$$Nt8{Ng?FaN#`{i2B&Qߟlxkl#y!#,@6o@g\_w$9ep>d`w)C'*? Qwv.ȏXET֛톶 foOV;a}k'0;5sFQ̃xy Bi34Z)"@uǸK>rNqmm툷N1nE>seFF]hGDyjIU8FiWDׄ:&2tnZOTOc8V;#Yrךx6FOvnf3KtKߧS=8hpHd\jez}hL/̻&_rm8c shJQ]PΜ: c!ırZ٪$w|wd{ጛt'Λoy;Wuwñ;e9ct+ƞlc7<8Q{}we޸ 9.])):ť\u s$ODB=,EX}Yǂ LœHr=.lAlYF,-,PqY14[pb,2YFc hcҊzzx[pZY%V,˓Ӎ@ɱmL݂ip'$y(eQ.'+mt!NUOt\NpN~'m s*TUIFR[3-gHk@x#N TOMB]\)4<,Dr$Ą$iӱCGmW?pN<۰iǂ,icȓ~G{~^:}]ضu<llP%HiwF \2qIӹ}Vؚ̨VsR2kΎz&qgʁ>ITm)I.)ٙ+2eiڪՇbu)ď#?I8cJ++.E jbU {lqq)\]R[RMB ㉩cd޵[{$L*swLNP:XpBOI ltx ᑁ+32[8iN"..4TcXƷ/8*,sCrs*15U;%)52-IlNrtjN\c}vW)!;[.EHs=TRFpZnVw9|M-[b$JܳmGb%?mgo?$nova<{^æp_Qݵk xdBq#fLr:]6BGaڥ(~ r'r ]kd:(Ƹ.hEߓ@K v{l3=8](+:*RB)Vuj|vV>A(jnZFzIOpXYLk /cZiAlR:6Hmwp pp$ %$THGh O iY'봴ѣQ&h2BPD$$ q(<{sG "ə̵2U@G؂,pbEEidt@1C;ù)OB1R(pɑWَmiv-x;[%>yGVT+ ~>oVuu!vdF\+NjSA5,3%L$ӯ@)0*Eƛ~ qu~+]Ak/˷~AOAmLAfeG~$ӯ JYiul7V_ez ~6D?VU?M5AuCNӏ<~7C=#Dӏ<ԗ~䡾#wL?P?i/~aϦyh\#!o Eg w~ &a"xT IEYxcE?N(vDeo-1f|A}mKP< ȁlv@0HnfC遡js+DLLɇ)uk`#-ܳ-Ü0}uW\W5`&TeoJ; #D3~:>chqZ0[I/U[&k<_n ֆm+DJ)F L#q|nxDr1T!>W[n< cD QD1b:MB=saj c/懧f4tl9Z-8kMǘ"~o\(#qzf <*XQBj?;)UBʰqF\\-yq8ޙbe"o%e"JYUa0ά7z>yl/8mlbs.q,j\\|~ui>XjD{~Z1WWv8]zWU|mT#JY뫇修s#';crGi5=**Kk**e:Lq 0fcHYe*p (1rj!fN),awQ^=tnH_1rFffl{HD6hhaͫ(uTO-8*9j0m2КtZYVeS*gN(yJqiǏWNR>VW̨Q1WZ]V>&mNqLَ3ʱ?R:QU^=ml1Z+f᣸zbŸM"6n2,ZS(Q5a 0saiSf;R*RSR}.e|3U8S4/xlE [)9X]U^=mJeiFӥ$4F̚5Ygbf#=RRp+J$ouyOGl3{B`;kiFZk5r.?硊}vA>.jy_N?.n6RK Z^ų}z"%0sMQ5\ET endstream endobj 13 0 obj <> stream x]Qj sM[zeBۖ}ThT9OB>gFzm[<7gD,N 8*M~CsV5} 4=>$:G8|^oP{`i@ܾ &۱W~=ϟceEF#jjj?eW?/ j.|LT%Te)s߼"TMP\>X lJ΄ endstream endobj 14 0 obj <> endobj 15 0 obj <> /W [0 [777.83203] 91 92 500] /DW 0>> endobj 16 0 obj <> endobj 17 0 obj <> stream x|?|fI6Bl6!j 7 $ %B@ീ bEP AЫ *V n>]9gs̙y,ĈN IYU;h|1QX̊UWh(|Kk#wXrOTA_'B)3.=fwGx)UF!1 woxM7y±̉D'^4ctn$6N[n=phHҌ.~r'DIגƗ=nBXfqu'wLfHj,tN 7}B:\ >RF\gDN::5*z>i8FوI?ᑠ7|R?8 d#70 .#o=p#e]`\HڋP20G:) F{>iFD#_7fBBD(4 D|[ԳVTyV-ǹC_A!~(s)Iلw%((!c%ң"/ yQggw};QEQr/RspLOgA?ѿOB[J$ՕwQ=xE%;}䫕g$>G.@V9FEHW@o*.(AR4Lt!F򕔨~Bm `V|*eRо7&F}P6YA9B⾳1 ]̆1H_*ueDy[:ܟ AHbnhw$0HyPo4'W)CYqgC6Bsf L( : />> %t&H8N5-S-Эf4]辠JC:A]e%b>9)2"}Ҡ,j"mW;yNv|V9z7vP&wwood;%}'o1M[B[ϯ#7-vhW'oki2/|>-Y"  B0hrBo5jЊ 6ry?uӨ')l]Yy m*Q?hmԹ뒤A}=s $H*wP|_RX}I&Rg =m=QR/[(-1~i$[Ҧh+_'.@G0X;./] ]qn~|T/lYZ*vM먗WZu4RQmyb9u'ojT#R0/E l.`p ~t}EJ`ZsvZoiXkv̷]e~J (_` e̥(% ("m|?Y6c2bU xهo`Q({۷t {R,# '"[? ony_,7фA>LBZ͗i;Vrt0a}( (94",sW?hJ4kW!W.zKqgwS.HA|ζqA[r<-F0w^^^-FU H']sG5'_%:u3Փodqw.9шxt;bu4˞ _'@ߓN<Q&XM_ /:"OO,D6UzJ О=mF{3P)@f=NB/>/+g?6Qo id kubl?X\7VvVN >@!ϯ M{@P=ð=#B8uyLh=sZvm8a<ȷ]_;?OpK/[%pK.bUCy4aȨ9`cJK݌+g>*} Z)@We0n?N"w.wl-V?̤Qr@b?O}:h)B"px&c+sl~S,}9*lߌYJN!ܐgv9֢,q6/q˂dԿ@'+ξl$`}XKn2ϒXBQM(( <7xNA:+YWq >H6<†>Zc,b8QF;31>o/ bM HY:L<:'C$qӼ`9N5_u7P}3ҳw6і-鯤R%F5Ɋ5@2nٗ`[ɯ <SNgc99\Y\MA(R"e)`MyVg[+}[/q h/΃3U@8t W7s3sNAӆ~P>Mj菀2_ҏ욞VӞ :Hp`|@Oyv1§=]Sja/yiT?\Ei)JLCEe@߃{3%uOJ4^F ̭rRy? m< V GM0hGqB/Ze& 7yQoK9-xA-oOK q|gS[~^OK +m ė~M)-?ǐ@}xo@-VOc [| ;/盀< ?g%2v|U_alemʲ6 7FɶEzyߍwݟ_1rϝ87@ }#hs_=@<]ri|.j,` 0\rTJKXl֪%]Qv}3tCEXF ЊPA0 6dv3pr9>+T[K8 Qn,%OS6On(/ezOºIg϶cr[(/Px.)GwbXA}7 6W&-ϟrG`n7lLM>:u:63-̧g6t|h#HO7POll|;x'Y}&F54J&+h>Ja5Z ~^r@O8C;#1/&N \TEo?~v?_6?aBׄhIYg9 γҿ>{0ggzOŞ.OI3?;E#O?}8ɏ- #hwݜ(3  P(8?Str_ /7`CI@S וc@=?kS?$`tcP.ql?X>okݽ3[ |,}1|^ {*Ё延Me~=i=|v9a ek]>s:z9lgٲ|7`u@c?o4#Z(7s Gx9|f :eOKܖ׈j;9_PKG#r_0u(w%ޭAj/RʡC@-\Z~@#co,z!A?] !M,υhmT?|eov(4Ŀh߷H?J1+~wZþO׳´XWZ&%s S ˳dϾRk'l(,FhyqhzGwr{˵XZB>\K\xbC_&*N$9& = 7 ϶Z>:۳?{/]/>OiƟ[Yt{S{(6_u'֓yd퇭v> Җ߯mP96N46&AΧ?ԐgW8ep=ȓ䙤EmmR̆c-`V喀fȶn*֤D~02`: @{lRnNzGs '֚1W'bWO|f;SsXg"廾ovQ7; 7x~=xOp?98 ~3FpsY, x76qr@~H^|=hfIu= +h~Q:mn>}w=里E}RS'NNZ9=옇>tzx㽏xOFO Oē8 S˔!({PuXI۫ICfz^2|Bc"%.L\\.kkX@f\ ][\/v}Yw;IfS!ݫOऑICf76,2KN Ȭ?-:|styS-^{>O7]UOrO1%9y$Hؑ####~D;GKq5z_?\{蒯ҡ'C?spyyC7ڃ+8pXrc\|@﷿d]wڟ?}QپoO}$J{a3l}kmWϾR%KڗxCO|`Ә|wwwG y?5Ǘ)Leb3]]im͕~"[9(?lpiY\^v2\ f%/]ď4qePt5]\H+Snt/=L@W-tet;-fӷ=7@DD7R%LU"DzvgF{h/=F7tI4#tMjF3h:ͤTCS-͢4h.]Bӥtͣ|h]A Jmlq0itN;؝.v7F3c{Jf1٘9`k؃!=ֱ({m`l#k' ŖqmaO,ml; cN"}"Y{=ڰhv='{=vgs,gq-{ųv,%~cbnĒً%2{vR=l/{`ovڳtց'-}T^_ߠ/ooooooWwh)]~~~R_?R:}A7 &qu:C߬oџзS?gs WW]n}W=TU6L㚢隡4O߯'g_Go.{# nDFƈ6bX#Έ7 F2FlG?l?l31Ǐ?;~qqqq1#FH7:m1W72615%RzcqܸѸɸٸŸոަ z׸ݸønذ{*cqqzާ}tޡ5ƃCZacxxx`ckvmO'=מjOWoVo1}bgW@d6Caps9e6ǘc2syy9޼М`L{=۞cϵn+{G{̼\nhdlbjf0o704261W%D>:;ʾcv~f_o,g'I2Sgs\:7-ncY<0Bڵbmvv[]=JR[=ǵ-vI_Ѽ^S7w}AcvDVAY;1ཇz~=Uow3l=W뎝Q/}bK~z}~>P/!P}>\G1yXL훂Ql]q_94q;8 $בHwd8r=RPHy9ĄIJvb,lmMi6fLe6-j 9m[-}??{{_am;wK~ľ6R_di3mOh=N_ӳH잆ߡl2ZUF*l6G9fW*ߡ^AOٱ KwY!-g\v.rsK m'Oi}Y"7_]|7 ~#1??Ϡ@GjъKq+IJ2trVUAOjô ZV-ikk;ojZ6Z\j+iJ{%]mv6|yy){dBoRl%GU:*yJ'34+k!ڄ&SK+eohf_h!C#ho:t8 :si/9 ZS/Q(]c؋~T˚`i쎐0gxDdTظ]dO75}z̬܎y:wڭ{~*,ݧoqIX:hÆ9jƖ;N(*&_4z3f^ UW_s-Yzxͷzۊ.ޕVw?XCk^^y6ol-Olݶɧ3;=˯ko{&&&&IMUT뜂=wڥs9YۧzS=grNnȟ)'s +]q*qǂ_V)s-Q!P];]%NYRR^6m}=}lYfkWbz19!T}[OqI}X^TT>8>),+XO>a2 } ٌZ7fXrV'M,pTz*+.[T63nq}ecOQyD߱+KJb"d"wc&kY~K!ґnů)[ϮAn11&<%"|LY27z1/mۢmCԶĽdXOR}a(Z2bަ"w)Y~n 0LUSdvh+= uwOr'c=SwqNK&uG6|JW~T[}˗8{ )k^ǽGlw=_uzLE F:$ -iR4}qG^2%+sV^uA >Vȁ]&"PpXM('6E`JSrqy٦Lm挎,ңEAr?tt%ٖ:-Oޔ?WԹވqcEi~jc}d߱JzSԼU("e%^j C/*OP-@My{ ׽!Qr`R. *bG?נ|9@ P!u&Ze*%ʭtr!,@nP~W NAaAo *sd]nHt9{'"  [ *ޓĕ.[:O! Iyo[ ·Cr!"`yˑry.Trhoxzp >rqX-B%c:zu2 JvѦ'u2yS\BS!&44@D*ZrتMmU&NQb@U&5丶+ChIE|@],%P] XrZ[\lYZ%9J! շ ޣӍz8{ZC߫ҏnVzPQ_[77^k_hWv=^df\[e|"52y:RU& ޭ\LݘQ\(PWB TC( / a C,*R@m UoJ @o(bC!C0B Az wN4]yiE,?QT#էtVTP;( 'Qk<5ޚ5kԡޡ퇮Q =kO7}qy]]k6 zfОAA5 Rmj͓4+憸yz0  p @puG(b@CGyHdD:?-]iih0UA#2 q=$.\eq̍?&F{8'jUyyQ|(E!۸(:LD@[+wuZ()EC~k7nפ"{{ ;P6+;"C5(*)פBK 7)⤐sDv!!H[$avPqBHgRv6DtePlIuqӿٳg ̙kݜXWI4O1q;>}9)Sk ,/XU@3 $Ik$/O^!Y RT*deSR,ۜ"X7g ˘wldo3ϢHtF 7&d)Y2щU6vb%Cw^,hCa'[08ތ2>%)yJ|_kf u2;eaBu32H@(8f9Q -4 0 r|Nh8wHDG ZE)tuh,puѭԟ yHc- v0E!K@=%Tb \Kior٨/=Ft6rvP н Km>px &l˾i M VT.Mb<]k3,}FOc-BMqJ_m5)Ρ!TԿ{,uT|i>;}3 ~dй4}tu7ڻ[)͡h!z >BXG֑?a:FrZ7^VĻ -k4=\EϢc,yЂԩjwJqr~aP<%[; qT#w9zcǹ{ԝeQ͐m*A߇"HԽw6FEw6`lcq'aRW^Q]55E<<;p}3ƻvK KeY(ɋ((ڵ4-!ZrC oY4ΦcF8E魌Rʔʭʿ,uvV7*g6+]-JLLݠ?MпZt%-/7j[zޢ+bIs5Z݀,^bė'۞w兼//{++I/wEyVZU}ZC+F{c1|';,;y6߸|c|/eQ6z:u-*^@_gi6dp5ep|G{oȦເ-dgW2vxְv|b٧K=si"^o-=7mm%RV(+w*))o*\TsuzzG}M}W=mR{N;s^niL/ՋޤZz)?yjhHL磔 dvTqZҏ԰1d|l=㟩m(kĞ5J_]}Cm^}O|rrC[j+kVH:YߎBy5_Jcq.{Ŭ򦺒>Q<Vjb~!gaqOD]L6*bOVk ݪ!]J{S!6l?G+O{.2\I^43VVkˣXXSbkjKg)4ri<z`n|Xh;tp1;rBV ?m찖1<pZeXR ]ܘY;*RWK`a[In7koPCWBӅXs>FmGbq{-8G{aSMF šUnw#|W} S[ ;@]iVhjgؗX}#//} =IKw`; }ޢ6G2$4aA@nԩqb:H}\FS|ay5۳5ݥdS4Aڪ7oAsShE+ZъVhE+ZъVhE+ZъVhE+ZъVhE+ZъVhE+ZъV-8kG?C ٓi4nO?fFq=tN K'McRl󧂓C ,BtM O کt­8Q$~ܭ &2ZL+owɊΩjselU>z<+ɻBY:TX92]NW+3ݖ1uXu.~κa9٩cm*e1$f'$gg ϡB1UظΝGXeqt<:'wqD; Kd9,0O7;;SiA.fx #7K#:--#S'GΩ#S7='5֦8:]ڱv.܉lq)"<1ҳ,`sle%kYtw+ cq.EqqƩ"bSXD'yWDKvVfnNӓYʧ|]fd4c_;Ofϸc㿆&9 $)p,(gx䓐ۢE ȟx!Rs#)˿g΂ЂE΂?/زl4]L?bm۷~yʏq'F|h"]xaHQ0V" hQdLhVa/4bq*DmG{aw򍭾C y [B"c"S m2"$\|'E;a@fQH4&?O"OoPQdfQ@oʢӋ6yh9FBӏ&"$mbcbæz$ 0"c$uiipn]uKKMij I{tɩkǔ5 qXV\dѵge5:ncmL2eR6 ^<ƭ:*))Q114E%\;1抣r )',SeetP4G:|t0L ,,ldnF댯3·0g XD+ey,"`<6¢:uo1Y:j?) R[э6gw|[NWhml*xF2$xN[,ZW`x |j)xvޤ\9[t- !.eE:3 ӶxNP'^u SAIoQ_}Ka_*hai.'Jސ3%oJ [B 24 !C+%C֊Zxz=Cֱڞ /M34BvѷD;d|C%CNѷPx#G|3Ymd=$-.8Y?x'֥K*7FQjRIV" 9i}F:S/7jp_u2oU噈͑5eSݐS :SZ=}QM\yEX'#Rz:w Βf"f_ )7!Ak7R>Rɑ\E=s~dYcJ̐R m,i :H"@Z_he/.mM"$wE[kL:>۪0)Pb~7)yr._pKm֙5wuR:U{e}%uɯU4̣?ǧg3R' ꠨?J\"G^#gؙGtiR ̊sFaꖽۤzDNKxvvuR\3n^moͬښYu53ݽOwhJlUVUfQ5=3GT]4gzŬ`-݁cfF.ٹWOU3fr]zͳewT"Q 9xTg+u**fT̚殙|Ѹg6zfu]U{d]EjYS3]YI5sfͪ}JFKKg^:yr*wty(:zvLIu~PŬʪujgTsϙ]kR1][5kFuy%F,US9gR%S'MiVzs*Q]Y=v:P&!v̜>ݾ:]5c(ug^)> stream x]Pj0 +t=,](!) 9A~c+Fq6 l1̌I7SK>~`;L0xrsX"8zR38o N&*-nN- AU~vNхJC4wK_8!%8Lz6Lާ ?~p"QUGT?M~w.\$Y> startxref 39394 %%EOFmpi4py-fft-2.0.6/docs/source/2Dpencil2.png000066400000000000000000000203541462034230000201740ustar00rootroot00000000000000PNG  IHDRxBBsRGB IDATx]$ez>Yf] W{!*^B"݀I;n5r'z4bd]4^$Q!DE !gQ ̄';5tTS9=7ziۑ P/pZK㕒+Rʎb!,-o)s"Μ-6@1?p '">x&B#@)%ߊx!"2gӈDjJ,߉\_8="{8"#.bo7"n @ |F]^<83\| p@#'d9~O  @`(زOk{ų"> J&(3Du|  @%N7;9 @@igy֞ɇvz,PZZ3xM=<"@ LfZ)"LLSZ{Ȯ#x_7 +n4 PbϯOn%,}wĞ+l]l'L(gI[ T =꙼L2/ @F$T9~vD @%*g7^iX @@S4 ӷ (u2  s(} (u2  s(} (u2  s(} (u2  s(} (u2  s(} (u2  s(} (u2  s(} (u2  s(} ,սNK5 @GK N×np'%_[і rGxyҠ[W:@;M֣jC@8ɒd6 @@?~,  Z@oMfC@8ɒd6 @@?~,  Z@oMfC@8ɒd6 @@?~,  Z@oMfC@8ɒd6 @@?~,  Z@oMfCNf"=W9|.PbϜDF~ٖD%*PbQ~ĭG<gU{z`IJ,8?" ]F ƑFJ,y7cHYطFnD>ʎZdiyH/w7/ Joue-g;{ίP?/ߌȳvZ&kΫ4/Dx(B#@%-ު [~)۝s9G? J,ׄyi|?9] @@CܯxX_1x9T_7%AKD12?ؔs"^}ٞ#? @$R 'e:}9 @@ |OLyK%bTj02y#r G#@17Ts=oM0ț?~:BqJ,EmM"t~*.,FW̼7cs'/xN x#;Շ4"r @FcY{&^]+k p@ijUO;7Y?'V3"Ji-p΋0=3Mklh 1""nK+Q~_ހ, @`@>/?ɺ="/̳{"r 'v 0Iy'Yo&S17DTϫg2ɼ\R#@/R}2d|gDq8B#@S\s\{aIN4Lj P''@@  P''@@  P''@@  P''@@  P''@@  P''@@  P''@@  P''@@  P'T:/ @gg,&8A_}BmSH>uW^O\2E/z/~,OvK (B& ,4XEWwZtkjX ,HM-XuFWE읐YwuD~#4h(_4|>" F| @@EO< ǼGͳ7X* 0Xto:9 @@ Eono;YB`a~h/x&"۾c ?  @d$m=δ7؅U _1~?k^Ev#uWKS[^_ @`C , >_K+|u->k9<3{iyH/w7/ Joூع<(=E_˜3"5Q`ozNӼB"4Q`~K䟷*ȖfvgĚN=X.ׄoi|?9] @ 7rb,c}nO=%@E-y`Sox-bgDe{*ⲣ @F%G'dzc\FXt?;bӔ)y&,95!9 @@E_Eo<{ޚ`ҥ7Gt ,5vBEƄt @lEb效>o#>s.3ީ>ԔS8"4h 320 sW sXnyD|7Fm]1iįE|!"2sbEWĺ5M`k|#⽈_VyfAG ,gߍ3vApCā @O,{rtv#sѱ9Ss=xK52 R|,?wuW=_3\iqnvt>3r]qhR}v y/>҂,ЦTO|4ۿ[IhS3m# q|4N䛳@>\ppfj66.6jhſm;|gbwo @w#XyaY4ZY?x0" @jhry+vϱŪ~>`bV!@j |n>K$Ods{2ma\խF-WF?b "2|?`4ΌyGg";dAmI/DOD~#qly y @Rx2k$bv%7s&^zRiϩ"Q< |^VH>oeZ]Z68Rx~+Kl]3:v.}tίMI$rIZ'#猜o?;⌈w"Gh R |zγk4,  0@ S43oSPd @ |PPd @ |PPd @ |PPd @ |PPd @ |PPd @ |PPd @ |PPd @ |PX{A~),78̫dP<[Y] R۱ С!C @KKm"@@ |E.. (b;Pv,t(wPR@R۱ С!C @KKm"@@ |E.. (b;Pv,t(wPR@R۱ С!C @KKm"@@ |E.. (b;Pv,t(wPR@R۱ С!C @KKm"@@ |E.. (b;Pv,t(pZr&ct(>`dw#=$@LGfںQ;8<[ssPߜfL}WjoN`# ū#lUW˳>Z=X-6rOoE<"sVqUsZ~ Xy&ږE擈O#>a9˶{WGėA,_Eo<[NvV`_ that allows for any index set of a multidimensional array to be distributed. We can distribute just one index (a slab decomposition), two index sets (pencil decomposition) or even more for higher-dimensional arrays. In mpi4py-fft there is also included a Python interface to the `FFTW `_ library. This interface can be used without MPI, much like `pyfftw `_, and even for real-to-real transforms, like discrete cosine or sine transforms. mpi4py-fft-2.0.6/docs/source/conf.py000066400000000000000000000132111462034230000172350ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/stable/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys import subprocess sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../')) # -- Project information ----------------------------------------------------- project = 'mpi4py-fft' copyright = '2019, Mikael Mortensen and Lisandro Dalcin' author = 'Mikael Mortensen and Lisandro Dalcin' # The short X.Y version p = subprocess.Popen(["git describe --tags | cut -d'-' -f 1"], stdout=subprocess.PIPE, shell=True) # The short X.Y version version = p.communicate()[0].rstrip().decode('utf-8') # The full version, including alpha/beta/rc tags release = version # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', ] napoleon_google_docstring = False napoleon_use_param = False napoleon_use_ivar = True # For some reason mathjax 2.7.5 renders bold type ugly. Use 2.7.1 mathjax_path = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML' # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' numfig = True # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # #html_theme = 'alabaster' html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} html_context = { 'display_github': True, 'github_user': 'mpi4py', 'github_repo': 'mpi4py-fft', 'github_version': 'master', 'conf_py_path': '/docs/source/' } # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'mpi4py-fftdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'mpi4py-fft.tex', 'mpi4py-fft Documentation', 'Mikael Mortensen and Lisandro Dalcin', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'mpi4py-fft', 'mpi4py-fft Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'mpi4py-fft', 'mpi4py-fft Documentation', author, 'mpi4py-fft', 'Parallel fast Fourier transforms', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- mpi4py-fft-2.0.6/docs/source/datastructures1.png000066400000000000000000000352571462034230000216200ustar00rootroot00000000000000PNG  IHDR# IDATx{dW}Ϲ}ogvgWXYH6q.c˒Uld Y `(\9PHZ`l ] Brʖvog8==ٙ{N?<{|w,K`.-8p8p8p8pp8p8p8p8p8:|~+w,K|V-@`N82psC V@2pdoIou;VW}\ ̅Tb+bv:Po~v@՟}a_filyX  z X5Ns[vgAJaNsC;]Ruh|@絆uʒXӾ/8PjݫL㰩[˝ZCTm>dտcj?!M-ɶy4[JNw:.dHyn+Vk[t. 8Zm ^.2p @?L㐥t]vT݃ b)KB@ qV: Afvjo?O0{}\/3Æm.Rwǒ }k@!7#< ]`F7.ugf3ӻkiN,G xV>[fZ̪tWvwB馚7CpXA=*[t.[ͅN%rhv'P:1DqrZЋ\lUt[rÎU;hA;ՉŞ~!8]L1#f{j-o%J3ow+n\ ̗`cl/].E}.LDj'[^J8>\ ź0[ʒG(!Jn~ҶQz׬ 8V2JiRإVPډ [O /ml dlP\aZW@-+(u'Ut&2?K%/u%SP2ȶWh@s d0` 0t 6;o%zw/oۏ ` iRG klXbѵaUˆ@D-?kh D~\:7KXSw{H^oQv=Uu0Yʒ`o%!aᦪӗVnlj='{ks_8`_WpJ6p7f+^/Žs;ކ8@@^ ^Epʆ@m;-/݁\<+Xz/u9yI>2pl$,w 6/ [@xV[%Kn%{w_rwd0搩,JHRҶSMH1Ezq/T urPyB@8"w=p_eIv)_2Vk_ \7ҺzwݗK!h'Ls=s!yn]>L6G d4"TuM^Z)yq^/v"{ռ@bYLksJ$);lW(H^YoGK^xտېY !ktcUW,u٪wm@boM !$NWܬwC%7t^<4P'/UOalztbDP3ȁ$HWι9;³L/Y~h/]m^@hD$drr#0g=ޚǛ8KWLenv-@nBxz/u{qﭹqЇL IuT uSX5J3@6BwSʼn\0HrK^sޖ4Xjzn6%ةG C \sŋ?n 9~8Io;@pb1Æh5:5Rne)vP7/א͂vR 8`#!]?d*VkS~@ Yje;9j"YЎ; \~pp W,kJ5#QeSr`!eI0o͹4Tp.X3NYj%=PeDRIpQP'΂fa9o` 8`߉qLMl@C ¦r`~,NF&N/ּKl~Z56LY,n@Y f+l^{I4 `"wAp?ԣy2˶`;MC rj N/aa3IpGW檮rѱj%C gm'&f(DfS0pVTWad(KF@PKTwL̂vYh Wyn7d]!ӷ Pe;S !$NutY둷'@U}\/3C]4 V!pD(~7 O΂vCE W2,Y+^Y ͂vxkGi0x@cn] uTmJ6s͂vܔ0 @=ֲj!C]En|5!Tu,hI^Cp?m㰮ʢc( [&90,h;k_i0IWaC]2P6bZlEKL %7uz}o-+ f@c(̒k7:g @'-'h< qwK.Gi0OF 0.[ͮ]-@GC [(Pg4 0 ԾJ7ڲi(!Pک + ?ks8 冱Kfkl(!gA0w4 }\0 !<́h< J`ӵ$&C(LcŎFPT8lc`lo,Wn< 01 ]5?XwSh  i0N@FH,h4 ՟ 0" dbD(i(P\a @ i}B&,Z-i `b] !$%;  n`G 0& ^@ Mˁa} J0 s:1"js`~#/x@`PyD0lLpV>eޱiqJ)zZbςaW19NYc( dz)a'@85mKUwЌS1y0o݃7V: uA (fش-N[ƣ@Y yL`mh/rתwmj5N :ˁqc.[(P΂4UqHW@kc(<"s`iFp8 @Ց4:m$tLN@ !f@2LhR/΂=plJE-Ը|!l s`MN[䆥㦮0. f@c(/`C^E ԼTl"=7q{ Kіtejter:P6¼a ͂va xLP.׌%;& dbDX7 d'n8fAFi0TX\m`lP21" ?WA BHK_^^]54*uhtn%lbE-+z0Ƴ]'v#X+jm.7ItkXlD3ŋ<0HqS롿@uQѶb zkѪ[& éjM#,4?Sܸ9$NQn A8Ğq3[1`& !6̠h N[h *ǦPBHӶ̠n90n@dz^ }֗5uluL@P҃%m PQP7΂Y i `"y2CuPROU/(˩gA{N쭇@pPEX ;5&m'pD4BM΂0_9 uTouFג,Љ!Q`904N@]/sYzk>`؜crJ)Zm+AIsςvaqS) @L$(]"m%!r` yi0(B$/ڒZu(J908%~` 8ՆyT7VtehuFǤ<7ndNs`@r6MB?z{ISfǬ&lBxb^6f3l0x,0r4*uBc 05tbDxq3)h< ub:  !/׍F 0]n\7!avnq < ^o `c--!wͺmS@e#A?DF]/sz׋oLP8``%C]4P6"QPB'2-@d4 Rx[ @`̏ӪY5cEWւѰ͚mE1"")ޠu N* `tuluͺeyDXL\%u|륞3z@U`%]Y0h%C  mfA~pX+Iqm#L kFA";q,hBo7  J˚JFC -# gA`|?Qӌ!0`1W2Jіuuɔmn[7!I#FY, [MA@%] zs`H$7,qٚn%l)Yۚwf걖֖lj5=  Y2>tf׬[&1-@xD%CƦYB@pVyy{#JxD$,Kd[y`zǔ -Οzm c#@ u̇*C0NQ A@Y @ Z!^$0`PXg0c PyDTbY„@p036`j% SY r,h BМ:՟L^0d7"HdẐݾi0(`fe90dbg„!o$X ,@5EXgCXK?:ˆ0904~it !¦KL 90cgA@4?U3/okK EtCxD$AٍyɽƳ=?&  0Ӳ a@$嚹1"WA ;e6fA{0 w ] muf2nZ6j@"F ,HC` 56i=oDh"D }W0a"`^ `}I6f4h%!DLD&fA8a PN3I~Ex`dl4XújAC 21 aAχ 8`6͝ ,͎Y7 1@ f!? DFQ HqTe 0rGY`C "r`!ZYЁP}mQStk-CЭC A&H $?,\/ z }ps kWyV;Jؕ@Knvm7DeI,hM]'v" 9)=(݄7KB[隍Y7 1@CBHQP/nP~:$ F$̰ 0\pTVtc #UM' zX3B 90Y,  `O9@™ 0JGou͚e:oaM,hxN;i0lş#mm C aL4Xx '# Ν_)7rM ~mjWk- K PbM_L0-@PN$L g>u]W^y֛.a5? ն)F)?d(WL (EEd< w"a `pDM0Niu56 JAC 2&7 &ؔc5@-=Pi"gAn;0i0(G \ ԣ֗5fE:їur`YО 8M)6Ai5ha.N #%/΂fa~χ 8`~p&`%JՈ[VˁaQ&{ b@8XV07lf4!m&1r`ؘƁ ̐$I$I^׿&+l(]1.R( WL90 /<#xtٳg]=zɓ'!KKK?x]wݹs}駟k^+Y
iԼBe 0B6Njz+a"qCBHYR/Y, p1` *y睷~wAyNjm~gz(o+lSg \E=?vE{Qkue- @?Ej*K N L PΜ9/wy<>o?uӧ;#7SotbD#R%HjP4򫷽G^M?}< !$˲'N~Ek /˼ EAb2?L v+~0-@L^H`໑=_<8`Ȋ(<'y~`^L09""(SDj"Y~;n߉DJOljh6zeeYfVC e;Z^" È Kaa/& pӢ̋2I2iBigW~Cr5_ ~tjRjTfdoQM %He&f,/ 󼔅@ɶxkW1;BH{Ui톩Kmq`C B Y 2뤾w` 8cg0? \ EVEQ9)]9`VWȕ k"E OCEo(8b/=S6!aIS BI~Xz^{Ɓ XPeY;U[@YO-8pB`}f[p HE[O#nlDZƳfHC pښq* (5#e-@J!X fA0! _{&:̋Ԡ ^F2"d;(-f@p8 #^-`[ EYR G\ C H;r`uS6 Z0ʼ5B-8P % wj4|pЮL@ ZF#YKx8 g;U_ssJEY X,q`i0 _' .EC&`C 90BQ Rs@\YQ%곽 zT\ 29f2p؆2/(, ZD+I!ID6ς"kƯJ&SxJ`W&͇i@–P=@qx㗮n9 _ EVyAr& 6|׿L}*` PݕW,Z/ow @0G?,!BvME>:&6C%D? !껄o-dBsH"cfwU,()!$˜`rtpG8m2/K6AgV %)KQ3p)ג8 wBo݃ :ZmQieA˒ВRB6$<EAhbN`i0/\Z!ȋ(%H`ﯙ˂DKJ&p!gAn$^ Qet!hYRџmKY09  n;;`(6i6 :!8`L-W"/eI vDrWvL0Hi4 :B!`@Eg-B"ȇ´,nQ &gA~"p_ [)!|Q;3m "!D{<0CGND@ơj *G$j7x¦@ 唩m iQdɋg35,iY\@eM ΂D0QA-}߽;o-|_o6{ii ru; IpP`hNH;'>??~ 7ln{+Z7,ak,BJJݶ] bDK_Ay45U\|p?Ny'^W˙3gl۾꫟|I8@ dfT 5h&`@ s݄bƁ ,,)$~[g}[n۫vq-0n * Bf#5CYA0 H4@`?o~GOIJ,BHeO<_]]t:_}W9 Bl: v۲5ȭ(Mu \LPS?,q5 3{շ}[h4!{/tuuReٳz׻72/H^ʂ\ ʒz!r y ܼ.IQEQ|GXLuLFpt 7 6q> x?`&Sb|D@$+M4?8" 8`5- Rpdо؃ ͆7P0Ѭ(YLZʒ ,2 :~/ @.e97Cg+Gx o_,q`b@`,Ԡ>"WO٣i04mo~o}B}h| qYfEY{ykFXUz(!'Ӄ0^z36BHw 0+^eQl~)g41JJ(G(/!6Sf#r#ʂ$^Sa@"4pٕ@И=aြ(|6eA(E"J5@yK, !ʵ$#yQ=4Jm!])H!~)' vနٶ0-J.UgtP d[ZG(+.aM@`Xm @QвƭAJO50 DRq-e3ŦLYRRHqVxZv9蠛D7M۬J tZ@`yt&ظTP"QSܡ3vB!ψmAմEUSYHj?6oo~\Y?JⰟ=;׸gQ&\AQ-LZm`ӯTMG:{_)}-Mʒ9"0G)ڤc"UP,=5hslkJˍseo5'7nãS/XT2 & X)JO}bDe0+.8'dEZYQyY_hQTȰ3eC؁ t¤:D7(3ƛok- FkpREQ9g452)ll  ^/}BWVG7 dfrJ,sD\Ѓ3].?q;"@+pD`cK`6A v N;_*і@teɎ J@d ``ʆFLyխB&d0|}s=/Gg"P@% S ~w~w&~ ދ:`j"0֠QT$#gΜmꫯ~'92ƶ .̺(0ey]w=쳷r^廊kAfvih耼 ENiO8?xzl=o~gľאַv:믿EJxR6!ax 'K<~׽g~?#,>5h"^p GB`&c9gw !o7NuA` d弬>@>g_̟}}u@e a?S *uD`WUF:uMp+o9 ]:\*pT $*4. ̼` ?hY'DzkA7m Qx z#֎f?ӌh"˫m.1]W_A*Y֠ ̐A{(_P`OLO@˂r|D`& v0%)3iH8\ .KlN%% "C8T M>=J{.HCnKi>/LP# @玟r )q&qiƃ_d@U/fg?QujMP#3^j12#kwSy4ΕTda: @ rD`G%B R5P&=a5A5 LP#4P PAp-HC,:4g-zCEm?Ʋ!6~&Fqُ26>:OU ?kr  j~.@^,8$η"*{J~|N3 *ay !?};fU13/' !?u;fQc$D\,炯~_'g!~@*`J8p@Mp(jT/S( E?Ja< Eo(/s͖[p>$Ii/(8"Rv_x8ׂ权4D)n @50 jSJhIc~zCp&&Te1yO>]nnc=c'x5yM$}KKK۳/&ATdyE N;]oz?u]Ooꩧo|>VGSMpɯ1uLIT K/;vgn3gض}W?pIhb3)x[iwu׳>{-~UUD)Fy~l'Nte"w.{9~}7m}A|{{!4` / _—g~#Ş052A~I?AE Cvu8ؚ`<:zN!1Hq.тbvGA<;9`dGBGp-.tih<.y#t^f l!(/?5A# "E%o=?xÖ[N&r~F D{uKGpPKCfEPIѡ4'C&MHQ盏}ro5p|rV.9yK/0Ao(13:P`Nw{G~f-8w؛ 8g H' f.Tb`HlN)@LPM̻ \ 5!s:`TS{_EB5l܂L}1˞_zwxC]:/ A`~aOj:&E &j]kA@4 &=a1[zC!7l{Oݱ?L:3o}|_-#~&@ 92KC PdI}<}tVo-$I#/k^g V _g[nYZZz4~-;wn۷G^ }? i;R9!VEuBȶo?3=PE7xW\>q篰6lxVSNM>\s .Cĉ=\8y$!$˲[$'O>GY__ꩧ:+Z\*<ȣ>7x뭷RJ~-,oSy$ɇ?eBȵ^}mל[op8p8p8`N 5IENDB`mpi4py-fft-2.0.6/docs/source/datastructures_pencil0.png000066400000000000000000000317511462034230000231440ustar00rootroot00000000000000PNG  IHDR=h IDATx}#}ߟyygNv|JmTJ ڍO)r*rS ;a6؍ ;mP%$EQ>m 8tw.$Crșu]rϐo !TX,@h4 @h4 @h49擟$01{RL8pM&n`O]7b 4K&<1jRt:$Ox?Ml@@h@b@(}Qw}c##|Qj^?uԹs666!LzR+rc k=c}QB0B8#yʕ+O?뺋O39s̕+W!~ٳW^ܹsyܹskkk'OV/޻3;h)8"+:+|_f.\xGַ|x@=o_p'}}sKKK{#|^?04`DSH3E$DGDhH4({ ҽ'HM !?w"8#P$\=\h('%6A$oIBͿA4q[Q ;p#!I{o !d?El) @hpQ`B G"hu_BukXqB9!ؗLq FF(D$OG7~V??O~;#wEQ${|:菾YE/Btdmm-w}W\{24 >; B/0  Pq^}Տ|#}{gb#x@ ~0#s64$h@p=G( `nL!L`.W<~?ȼ%#h B>q c~=Ǿi B_8HE h>p|<bǥ7:+}Y,3JaXJ~M/^x#$<@(uN5OES‚)r;>ESn`Iðo3eKB%_A40gխUS]4J8ݓ;(zy} "-McT3TPHAA4xӆ~TOSDoaネ_M?fm:e',%c )N^폈 yžS7O .tIah8``56V:iKh &Qz4f aOn+lʙ4NUo직>]GP70})).YbKBg_%䮼tWWDzFo3.2)(>.KTp3]4/>; 뤡.4@RP|p'?>૷h`!-M kNl'?\F&F*XoҍS]E{IA1O_{퀯̙?RՏ~_җO 0_ D%K\Hqx<0|moy[.^wD˦IA;Eb4O>y/~_饗[e%BɤO kڵ]G>_;w 5/Jʕ+sO\ϰK͘J)J%v5&5;vp40̧>)q^}Տ|#}{cr]o[4G>8h3M%B?a{t |p<^W K4>̠KĒ11_'g3Y+h@mv)rXI.4cN~t | LD,X|*Et2u,^b, IAp4D -֛ cT3Rf )20vv،/׿;4)U̓b(T(2#q@DO)4 IcS8aɋ` Ysȼ @]"2I )I3:& !_l|zI,>+dɋQRMME 0B]"hƔҶ1:fh DES؜m#)ha@`$o檮/om$͈- `wIC[2ESH["e8p.4c FX6~RvtXeKJ[|FRPlwB>4cդ`񶅤yt~U !B| @`D]"e.ťR8 @pQ}T2X&eM M(KĢŧmִ4qׇ[ofLeSc0:fhD]"eS]61qFn=-D oD7,$aBRPǿ{7!wA}n!/Zb%cEB>t$68L!l( 2;Ex+3GE8a1EmƲ4_8$HM%B?a F )(@4]"0:zҥK=C> !g}g !/w)"!d233aUhatL pW}6>~~wUA&y{+ ~ӟ~K_%e#Sxhlu2VRt)%.t@p#ryuu?w'y˗/۶}]w⋳/;D)IA wSs;no?nT*ɲiڇ?_xK/ `10:&N8dhЯO-G\p<|Dc0:ƴTOf70,U---B{nc? 8ͪKF$=*kyxmeb-z7 .l^!l)4:D`tLHt[Ζ( p}IB^I+Ra3Ap@쉺Dh}{ta$,IWs51mcK8VcӪ~B8`HC?T6vk @qFX7ڢ 8`H d%BIat jbZh'ut4_{n Km`t 0$*eт[@I gf]",IA`HT*˺N՘5l};QmQΑGRB!c)Iw.C(D 2d0 ecUՖ %)t$C.ꢪ,m1p.\S_4P&@#dp<]"]Im1p.Zje7K58u@gg))3 b`h˅ 8 @ag)eə]tboa-+T; @0)lm 8`=5U 3kib > AB^D]"J 8Q! rXC @SD ;F]ZpDpYszQ.+auAH v;@j}upDq`KDZ8BN++_dD4FFA܃B%2v84rMm}ܨ@4clwAR(`XɀX댦eFǘJsFnc L x D`ti )GW_,{v؇ Cd0@RBJ ALa9 FSBeRV `%BI GH # bg_/>hqpKctND&.7Z|`gk؛7|M̯ozm C`eK-&! N7,PWu9mv.Vs,]"!D]"0:8~|l sELfo&/D:oH nJ\-zVh  s]"l 8@J{8I`wK-IApM 5@`wLIApM (N\2#@%9@RpSR6kA'*N(c i% 4_6 >F!@4220FemKI&\ iU[]"0:8!/n'b![8'a7ND`t pHܠT8縍 *+u]"0:84Zth!:XbELK|7@=-+.ApS+TDq@[TEYE_:6#_ LM'dѩ9N 0/M1%|䒺ʖ 8 W1Oj]3lWg߅ 8HtOAq<%h`jT4UUY6xu9lo8l<~A{X~7K8_KԪ|"&};ʶcvXiE2AWYF{`X*{ch g}$i. jk-7^Aa-SFJp":=SA&˺PfrqA臁0$A@ 8` /nJMwZcy ,qFEY1tY%) nN؝gaApcP~p|C])Bq-,>E!~߇eRɹ6n)ͱ QP92zwBIv!iޖw\YG @40]$!kh5Thus) "8`-sj|I--bIDATB-C5F[l! °ApѫTT(&)7DwnY K d 8gʞ/uh?CxXHIh lzfjs   # bSJ0p@1x?8PSHK67fmgq ]% ʩ*=dCVdr6Fe  ~@]eUTZ]4MǭzGA+zY,lpʀit$Uʢ. UoHV[kʭ~ӟEaj[{( $Z 5yq!LgrU"im@NْAA@0 "sP6b],fg\p4z*UeYlo:b+h3{9d;D.rV݁19.|@8|AZ~ىdЙ &/ۊlP^W޴tx@By XWڼDUUu^mDm:5)ӝyrDsi+%W+pEԿZ%Eʠ]sZͼ pt`Qd0pQ(zs0Uuy)[N5a[ 'B怎xVV* po0@*)5ad01gQw[/ Sōxm@({ELFK|T+lG5v#t^}c䌤`22u apFW8 ǻD4%mcF^:`. \1[40#2UQV MR4ImFSn.tƫ@3뀨T)u WSA.* Ik 2ˠG:;Vp!l7Ǖ$ Im챦 Q¶nm7fӫ74SYV$S5U#+ KB沭FD3aJSֶ.zYgt2 °Rrh4FQjT56!ю9 :ЌcL~K1WJ-X֊106'P) hx)#v@"OS)bO0.> 4Q)nn82G6u2MtLųe|%$-b2] ni5fwq7?RJ MŨLrjS).rXZx u_n"U=9h8gPli aXepbEԿZ%JUExڤ*s1g!* :̑ T*PrZRa W hMlv6V}^4ܮ nf_S(Apu;xh f0l t:NteAN` MqTym9B@bELfrd\G;\IZ~٩9{K&t4 18k#; 䄺Zh\m7h Y (&˚@ޔfjy\:0@XNZ,v $vox*JI# akY+0 $gt`d0+V(zB@҉,,kE2hZ;ԁ8^⛊ ˜&&Gȼm*- Wov6[ÙS[ Uj1>T12%//0I@;r;.) /{\/\h q2Cv" tM7UReE&QžwewK93R _1&H q]um=>e$J |ʾwI3zjB\aQ* `<-m|dRCQUAD}m08y Ye3_w.`,t=mNw7ej*̪8`vCRX8  aX8  h9   8@2H 0720`s0 W f12U4P@̫ npcܢ7&M 6 h   28?pQLh)DLj h@\E!0s5"8  na8@P)hI{`>@^)edph . ye|׈hEpH . T" phH vi@l95"8  qa04 $ 4*@@^)   @h4 @h4 @h4`&L |jIENDB`mpi4py-fft-2.0.6/docs/source/dft.rst000066400000000000000000000235701462034230000172560ustar00rootroot00000000000000.. _dfts: Discrete Fourier Transforms --------------------------- Consider first two one-dimensional arrays :math:`\boldsymbol{u} = \{u_j\}_{j=0}^{N-1}` and :math:`\boldsymbol{\hat{u}} =\{\hat{u}_k\}_{k=0}^{N-1}`. We define the forward and backward Discrete Fourier transforms (DFT), respectively, as .. math:: :label: dft \hat{u}_k &= \frac{1}{N}\sum_{j=0}^{N-1}u_j e^{-2\pi i j k / N}, \quad \forall \, k\in \textbf{k}=0, 1, \ldots, N-1, \\ u_j &= \sum_{k=0}^{N-1}\hat{u}_k e^{2\pi i j k / N}, \quad \forall \, j\in\textbf{j}=0, 1, \ldots, N-1, where :math:`i=\sqrt{-1}`. Discrete Fourier transforms are computed efficiently using algorithms termed Fast Fourier Transforms, known in short as FFTs. .. note:: The index set for wavenumbers :math:`\textbf{k}` is usually not chosen as :math:`[0, 1, \ldots, N-1]`, but :math:`\textbf{k}=[-N/2, -N/2-1, \ldots, N/2-1]` for even :math:`N` and :math:`\textbf{k}=[-(N-1)/2, -(N-1)/2+1, \ldots, (N-1)/2]` for odd :math:`N`. See `numpy.fft.fftfreq `_. Also note that it is possible to tweak the default normalization used above when calling either forward or backward transforms. A more compact notation is commonly used for the DFTs, where the 1D forward and backward transforms are written as .. math:: \boldsymbol{\hat{u}} &= \mathcal{F}(\boldsymbol{u}), \\ \boldsymbol{u} &= \mathcal{F}^{-1}(\boldsymbol{\hat{u}}). Numpy, Scipy, and many other scientific softwares contain implementations that make working with Fourier series simple and straight forward. These 1D Fourier transforms can be implemented easily with just Numpy as, e.g.:: import numpy as np N = 16 u = np.random.random(N) u_hat = np.fft.fft(u) uc = np.fft.ifft(u_hat) assert np.allclose(u, uc) However, there is a minor difference. Numpy performs by default the :math:`1/N` scaling with the *backward* transform (``ifft``) and not the forward as shown in :eq:`dft`. These are merely different conventions and not important as long as one is aware of them. We use the scaling on the forward transform simply because this follows naturally when using the harmonic functions :math:`e^{i k x}` as basis functions when solving PDEs with the `spectral Galerkin method `_ or the `spectral collocation method (see chap. 3) `_. With mpi4py-fft the same operations take just a few more steps, because instead of executing ffts directly, like in the calls for ``np.fft.fft`` and ``np.fft.ifft``, we need to create the objects that are to do the transforms first. We need to *plan* the transforms:: from mpi4py_fft import fftw u = fftw.aligned(N, dtype=np.complex) u_hat = fftw.aligned_like(u) fft = fftw.fftn(u, flags=(fftw.FFTW_MEASURE,)) # plan fft ifft = fftw.ifftn(u_hat, flags=(fftw.FFTW_ESTIMATE,)) # plan ifft u[:] = np.random.random(N) # Now execute the transforms u_hat = fft(u, u_hat, normalize=True) uc = ifft(u_hat) assert np.allclose(uc, u) The planning of transforms makes an effort to find the fastest possible transform of the given kind. See more in :ref:`fftwmodule`. Multidimensional transforms ........................... It is for multidimensional arrays that it starts to become interesting for the current software. Multidimensional arrays are a bit tedious with notation, though, especially when the number of dimensions grow. We will stick with the `index notation `_ because it is most straightforward in comparison with implementation. We denote the entries of a two-dimensional array as :math:`u_{j_0, j_1}`, which corresponds to a row-major matrix :math:`\boldsymbol{u}=\{u_{j_0, j_1}\}_{(j_0, j_1) \in \textbf{j}_0 \times \textbf{j}_1}` of size :math:`N_0\cdot N_1`. Denoting also :math:`\omega_m=j_m k_m / N_m`, a two-dimensional forward and backward DFT can be defined as .. math:: :label: 2dfourier \hat{u}_{k_0,k_1} &= \frac{1}{N_0}\sum_{j_0 \in \textbf{j}_0}\Big( e^{-2\pi i \omega_0} \frac{1}{N_1} \sum_{j_1\in \textbf{j}_1} \Big( e^{-2\pi i \omega_1} u_{j_0,j_1}\Big) \Big), \quad \forall \, (k_0, k_1) \in \textbf{k}_0 \times \textbf{k}_1, \\ u_{j_0, j_1} &= \sum_{k_1\in \textbf{k}_1} \Big( e^{2\pi i \omega_1} \sum_{k_0\in\textbf{k}_0} \Big( e^{2\pi i \omega_0} \hat{u}_{k_0, k_1} \Big) \Big), \quad \forall \, (j_0, j_1) \in \textbf{j}_0 \times \textbf{j}_1. Note that the forward transform corresponds to taking the 1D Fourier transform first along axis 1, once for each of the indices in :math:`\textbf{j}_0`. Afterwords the transform is executed along axis 0. The two steps are more easily understood if we break things up a little bit and write the forward transform in :eq:`2dfourier` in two steps as .. math:: :label: forward2 \tilde{u}_{j_0,k_1} &= \frac{1}{N_1}\sum_{j_1 \in \textbf{j}_1} u_{j_0,j_1} e^{-2\pi i \omega_1}, \quad \forall \, k_1 \in \textbf{k}_1, \\ \hat{u}_{k_0,k_1} &= \frac{1}{N_0}\sum_{j_0 \in \textbf{j}_0} \tilde{u}_{j_0,k_1} e^{-2\pi i \omega_0}, \quad \forall \, k_0 \in \textbf{k}_0. The backward (inverse) transform if performed in the opposite order, axis 0 first and then 1. The order is actually arbitrary, but this is how is is usually computed. With mpi4py-fft the order of the directional transforms can easily be configured. We can write the complete transform on compact notation as .. math:: :label: dft_short \boldsymbol{\hat{u}} &= \mathcal{F}(\boldsymbol{u}), \\ \boldsymbol{u} &= \mathcal{F}^{-1}(\boldsymbol{\hat{u}}). But if we denote the two *partial* transforms along each axis as :math:`\mathcal{F}_0` and :math:`\mathcal{F}_1`, we can also write it as .. math:: :label: forward_2dpartial \boldsymbol{\hat{u}} &= \mathcal{F}_0(\mathcal{F}_1(\boldsymbol{u})), \\ \boldsymbol{u} &= \mathcal{F}_1^{-1}(\mathcal{F}_0^{-1}(\boldsymbol{\hat{u}})). Extension to multiple dimensions is straight forward. We denote a :math:`d`-dimensional array as :math:`u_{j_0, j_1, \ldots, j_{d-1}}` and a partial transform of :math:`u` along axis :math:`i` is denoted as .. math:: :label: partial_dft \tilde{u}_{j_0, \ldots, k_i, \ldots, j_{d-1}} = \mathcal{F}_i(u_{j_0, \ldots, j_i, \ldots, j_{d-1}}) We get the complete multidimensional transforms on short form still as :eq:`dft_short`, and with partial transforms as .. math:: :label: multi_dft_partial \boldsymbol{\hat{u}} &= \mathcal{F}_0(\mathcal{F}_1( \ldots \mathcal{F}_{d-1}(\boldsymbol{u})), \\ \boldsymbol{u} &= \mathcal{F}_{d-1}^{-1}( \mathcal{F}_{d-2}^{-1}( \ldots \mathcal{F}_0^{-1}(\boldsymbol{\hat{u}}))). Multidimensional transforms are straightforward to implement in Numpy .. _numpy2d: .. code-block:: python import numpy as np M, N = 16, 16 u = np.random.random((M, N)) u_hat = np.fft.rfftn(u) uc = np.fft.irfftn(u_hat) assert np.allclose(u, uc) .. _fftwmodule: The :mod:`.fftw` module ....................... The :mod:`.fftw` module provides an interface to most of the `FFTW library `_. In the :mod:`.fftw.xfftn` submodule there are planner functions for: * :func:`.fftn` - complex-to-complex forward Fast Fourier Transforms * :func:`.ifftn` - complex-to-complex backward Fast Fourier Transforms * :func:`.rfftn` - real-to-complex forward FFT * :func:`.irfftn` - complex-to-real backward FFT * :func:`.dctn` - real-to-real Discrete Cosine Transform (DCT) * :func:`.idctn` - real-to-real inverse DCT * :func:`.dstn` - real-to-real Discrete Sine Transform (DST) * :func:`.idstn` - real-to-real inverse DST * :func:`.hfftn` - complex-to-real forward FFT with Hermitian symmetry * :func:`.ihfftn` - real-to-complex backward FFT with Hermitian symmetry All these transform functions return instances of one of the classes :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT`, depending on the requested precision being single, double or long double, respectively. Except from precision, the tree classes are identical. All transforms are non-normalized by default. Note that all these functions are *planners*. They do not execute the transforms, they simply return an instance of a class that can do it (see docstrings of each function for usage). For quick reference, the 2D transform :ref:`shown for Numpy ` can be done using :mod:`.fftw` as:: from mpi4py_fft.fftw import rfftn as plan_rfftn, irfftn as plan_irfftn from mpi4py_fft.fftw import FFTW_ESTIMATE rfftn = plan_rfftn(u.copy(), flags=(FFTW_ESTIMATE,)) irfftn = plan_irfftn(u_hat.copy(), flags=(FFTW_ESTIMATE,)) u_hat = rfftn(uc, normalize=True) uu = irfftn(u_hat) assert np.allclose(uu, uc) Note that since all the functions in the above list are planners, an extra step is required in comparison with Numpy. Also note that we are using copies of the ``u`` and ``u_hat`` arrays in creating the plans. This is done because the provided arrays will be used under the hood as work arrays for the :func:`.rfftn` and :func:`.irfftn` functions, and the work arrays may be destroyed upon creation. The real-to-real transforms are by FFTW defined as one of (see `definitions `_ and `extended definitions `_) * FFTW_REDFT00 * FFTW_REDFT01 * FFTW_REDFT10 * FFTW_REDFT11 * FFTW_RODFT00 * FFTW_RODFT01 * FFTW_RODFT10 * FFTW_RODFT11 Different real-to-real cosine and sine transforms may be combined into one object using :func:`.factory.get_planned_FFT` with a list of different transform kinds. However, it is not possible to combine, in one single object, real-to-real transforms with real-to-complex. For such transforms more than one object is required. mpi4py-fft-2.0.6/docs/source/global.rst000066400000000000000000000331601462034230000177350ustar00rootroot00000000000000.. _global: Global Redistributions ====================== In high performance computing large multidimensional arrays are often distributed and shared amongst a large number of different processors. Consider a large three-dimensional array of double (64 bit) precision and global shape :math:`(512, 1024, 2048)`. To lift this array into RAM requires 8 GB of memory, which may be too large for a single, non-distributed machine. If, however, you have access to a distributed architecture, you can split the array up and share it between, e.g., four CPUs (most supercomputers have either 2 or 4 GB of memory per CPU), which will only need to hold 2 GBs of the global array each. Moreover, many algorithms with varying degrees of locality can take advantage of the distributed nature of the array to compute local array pieces concurrently, effectively exploiting multiple processor resources. There are several ways of distributing a large multidimensional array. Two such distributions for our three-dimensional global array (using 4 processors) are shown below .. image:: datastructures1.png :width: 250px :height: 200px .. image:: datastructures_pencil0.png :width: 250px :height: 200px Here each color represents one of the processors. We note that in the first image only one of the three axes is distributed, whereas in the second two axes are distributed. The first configuration corresponds to a slab, whereas the second corresponds to a pencil distribution. With either distribution only one quarter of the large, global array needs to be kept in rapid (RAM) memory for each processor, which is great. However, some operations may then require data that is not available locally in its quarter of the total array. If that is so, the processors will need to communicate with each other and send the necessary data where it is needed. There are many such MPI routines designed for sending and receiving data. We are generally interested in algorithms, like the FFT, that work on the global array, along one axis at the time. To be able to execute such algorithms, we need to make sure that the local arrays have access to all of its data along this axis. For the figure above, the slab distribution gives each processor data that is fully available along two axes, whereas the pencil distribution only has data fully available along one axis. Rearranging data, such that it becomes aligned in a different direction, is usually termed a global redistribution, or a global transpose operation. Note that with mpi4py-fft we always require that at least one axis of a multidimensional array remains aligned (non-distributed). Distribution and global redistribution is in mpi4py-fft handled by three classes in the :mod:`.pencil` module: * :class:`.Pencil` * :class:`.Subcomm` * :class:`.Transfer` These classes are the low-level backbone of the higher-level :class:`.PFFT` and :class:`.DistArray` classes. To use these low-level classes directly is not recommended and usually not necessary. However, for clarity we start by describing how these low-level classes work together. Lets first consider a 2D dataarray of global shape (8, 8) that will be distributed along axis 0. With a high level API we could then simply do:: import numpy as np from mpi4py_fft import DistArray N = (8, 8) a = DistArray(N, [0, 1]) where the ``[0, 1]`` list decides that the first axis can be distributed, whereas the second axis is using one processor only and as such is aligned (non-distributed). We may now inspect the low-level :class:`.Pencil` class associated with ``a``:: p0 = a.pencil The ``p0`` :class:`.Pencil` object contains information about the distribution of a 2D dataarray of global shape (8, 8). The distributed array ``a`` has been created using the information that is in ``p0``, and ``p0`` is used by ``a`` to look up information about the global array, for example:: >>> a.alignment 1 >>> a.global_shape (8, 8) >>> a.subcomm (, ) >>> a.commsizes [1, 1] Naturally, the sizes of the communicators will depend on the number of processors used to run the program. If we used 4, then ``a.commsizes`` would return ``[1, 4]``. We note that a low-level approach to creating such a distributed array would be:: import numpy as np from mpi4py_fft.pencil import Pencil, Subcomm from mpi4py import MPI comm = MPI.COMM_WORLD N = (8, 8) subcomm = Subcomm(comm, [0, 1]) p0 = Pencil(subcomm, N, axis=1) a0 = np.zeros(p0.subshape) Note that this last array ``a0`` would be equivalent to ``a``, but it would be a pure Numpy array (created on each processor) and it would not contain any of the information about the global array that it is part of ``(global_shape, pencil, subcomm, etc.)``. It contains the same amount of data as ``a`` though and ``a0`` is as such a perfectly fine distributed array. Used together with ``p0`` it contains exactly the same information as ``a``. Since at least one axis needs to be aligned (non-distributed), a 2D array can only be distributed with one processor group. If we wanted to distribute the second axis instead of the first, then we would have done:: a = DistArray(N, [1, 0]) With the low-level approach we would have had to use ``axis=0`` in the creation of ``p0``, as well as ``[1, 0]`` in the creation of ``subcomm``. Another way to get the second ``pencil``, that is aligned with axis 0, is to create it from ``p0``:: p1 = p0.pencil(0) Now the ``p1`` object will represent a (8, 8) global array distributed in the second axis. Lets create a complete script (``pencils.py``) that fills the array ``a`` with the value of each processors rank (note that it would also work to follow the low-level approach and use ``a0``):: import numpy as np from mpi4py_fft import DistArray from mpi4py import MPI comm = MPI.COMM_WORLD N = (8, 8) a = DistArray(N, [0, 1]) a[:] = comm.Get_rank() print(a.shape) We can run it with:: mpirun -np 4 python pencils.py and obtain the printed results from the last line (``print(a.shape)``):: (2, 8) (2, 8) (2, 8) (2, 8) The shape of the local ``a`` arrays is (2, 8) on all 4 processors. Now assume that we need these data aligned in the x-direction (axis=0) instead. For this to happen we need to perform a *global redistribution*. The easiest approach is then to execute the following:: b = a.redistribute(0) print(b.shape) which would print the following:: (8, 2) (8, 2) (8, 2) (8, 2) Under the hood the global redistribution is executed with the help of the :class:`.Transfer` class, that is designed to transfer data between any two sets of pencils, like those represented by ``p0`` and ``p1``. With low-level API a transfer object may be created using the pencils and the datatype of the array that is to be sent:: transfer = p0.transfer(p1, np.float) Executing the global redistribution is then simply a matter of:: a1 = np.zeros(p1.subshape) transfer.forward(a, a1) Now it is important to realise that the global array does not change. The local ``a1`` arrays will now contain the same data as ``a``, only aligned differently. However, the exchange is not performed in-place. The new array is as such a copy of the original that is aligned differently. Some images, :numref:`2dpencila` and :numref:`2dpencilb`, can be used to illustrate: .. _2dpencila: .. figure:: 2Dpencil.png :width: 250px :height: 200px Original 4 pencils (p0) of shape (2, 8) aligned in y-direction. Color represents rank. .. _2dpencilb: .. figure:: 2Dpencil2.png :width: 250px :height: 200px 4 pencils (p1) of shape (8, 2) aligned in x-direction after receiving data from p0. Data is the same as in :numref:`2dpencila`, only aligned differently. Mathematically, we will denote the entries of a two-dimensional global array as :math:`u_{j_0, j_1}`, where :math:`j_0\in \textbf{j}_0=[0, 1, \ldots, N_0-1]` and :math:`j_1\in \textbf{j}_1=[0, 1, \ldots, N_1-1]`. The shape of the array is then :math:`(N_0, N_1)`. A global array :math:`u_{j_0, j_1}` distributed in the first axis (as shown in :numref:`2dpencila`) by processor group :math:`P`, containing :math:`|P|` processors, is denoted as .. math:: u_{j_0/P, j_1} The global redistribution, from alignment in axis 1 to alignment in axis 0, as from :numref:`2dpencila` to :numref:`2dpencilb` above, is denoted as .. math:: u_{j_0, j_1/P} \xleftarrow[P]{1\rightarrow 0} u_{j_0/P, j_1} This operation corresponds exactly to the forward transfer defined above:: transfer.forward(a0, a1) If we need to go the other way .. math:: u_{j_0/P, j_1} \xleftarrow[P]{0\rightarrow 1} u_{j_0, j_1/P} this corresponds to:: transfer.backward(a1, a0) Note that the directions (forward/backward) here depends on how the transfer object is created. Under the hood all transfers are executing calls to `MPI.Alltoallw `_. Multidimensional distributed arrays ----------------------------------- The procedure discussed above remains the same for any type of array, of any dimensionality. With mpi4py-fft we can distribute any array of arbitrary dimensionality using any number of processor groups. We only require that the number of processor groups is at least one less than the number of dimensions, since one axis must remain aligned. Apart from this the distribution is completely configurable through the classes in the :mod:`.pencil` module. We denote a global :math:`d`-dimensional array as :math:`u_{j_0, j_1, \ldots, j_{d-1}}`, where :math:`j_m\in\textbf{j}_m` for :math:`m=[0, 1, \ldots, d-1]`. A :math:`d`-dimensional array distributed with only one processor group in the first axis is denoted as :math:`u_{j_0/P, j_1, \ldots, j_{d-1}}`. If using more than one processor group, the groups are indexed, like :math:`P_0, P_1` etc. Lets illustrate using a 4-dimensional array with 3 processor groups. Let the array be aligned only in axis 3 first (:math:`u_{j_0/P_0, j_1/P_1, j_2/P_2, j_3}`), and then redistribute for alignment along axes 2, 1 and finally 0. Mathematically, we will now be executing the three following global redistributions: .. math:: :label: 4d_redistribute u_{j_0/P_0, j_1/P_1, j_2, j_3/P_2} \xleftarrow[P_2]{3 \rightarrow 2} u_{j_0/P_0, j_1/P_1, j_2/P_2, j_3} \\ u_{j_0/P_0, j_1, j_2/P_1, j_3/P_2} \xleftarrow[P_1]{2 \rightarrow 1} u_{j_0/P_0, j_1/P_1, j_2, j_3/P_2} \\ u_{j_0, j_1/P_0, j_2/P_1, j_3/P_2} \xleftarrow[P_0]{1 \rightarrow 0} u_{j_0/P_0, j_1, j_2/P_1, j_3/P_2} Note that in the first step it is only processor group :math:`P_2` that is active in the redistribution, and the output (left hand side) is now aligned in axis 2. This can be seen since there is no processor group there to share the :math:`j_2` index. In the second step processor group :math:`P_1` is the active one, and in the final step :math:`P_0`. Now, it is not necessary to use three processor groups just because we have a four-dimensional array. We could just as well have been using 2 or 1. The advantage of using more groups is that you can then use more processors in total. Assuming :math:`N = N_0 = N_1 = N_2 = N_3`, you can use a maximum of :math:`N^p` processors, where :math:`p` is the number of processor groups. So for an array of shape :math:`(8,8,8,8)` it is possible to use 8, 64 and 512 number of processors for 1, 2 and 3 processor groups, respectively. On the other hand, if you can get away with it, or if you do not have access to a great number of processors, then fewer groups are usually found to be faster for the same number of processors in total. We can implement the global redistribution using the high-level :class:`.DistArray` class:: N = (8, 8, 8, 8) a3 = DistArray(N, [0, 0, 0, 1]) a2 = a3.redistribute(2) a1 = a2.redistribute(1) a0 = a1.redistribute(0) Note that the three redistribution steps correspond exactly to the three steps in :eq:`4d_redistribute`. Using a low-level API the same can be achieved with a little more elaborate coding. We start by creating pencils for the 4 different alignments:: subcomm = Subcomm(comm, [0, 0, 0, 1]) p3 = Pencil(subcomm, N, axis=3) p2 = p3.pencil(2) p1 = p2.pencil(1) p0 = p1.pencil(0) Here we have defined 4 different pencil groups, ``p0, p1, p2, p3``, aligned in axis 0, 1, 2 and 3, respectively. Transfer objects for arrays of type ``np.float`` are then created as:: transfer32 = p3.transfer(p2, np.float) transfer21 = p2.transfer(p1, np.float) transfer10 = p1.transfer(p0, np.float) Note that we can create transfer objects between any two pencils, not just neighbouring axes. We may now perform three different global redistributions as:: a0 = np.zeros(p0.subshape) a1 = np.zeros(p1.subshape) a2 = np.zeros(p2.subshape) a3 = np.zeros(p3.subshape) a0[:] = np.random.random(a0.shape) transfer32.forward(a3, a2) transfer21.forward(a2, a1) transfer10.forward(a1, a0) Storing this code under ``pencils4d.py``, we can use 8 processors that will give us 3 processor groups with 2 processors in each group:: mpirun -np 8 python pencils4d.py Note that with the low-level approach we can now easily go back using the reverse ``backward`` method of the :class:`.Transfer` objects:: transfer10.backward(a0, a1) A different approach is also possible with the high-level API:: a0.redistribute(out=a1) a1.redistribute(out=a2) a2.redistribute(out=a3) which corresponds to the backward transfers. However, with the high-level API the transfer objects are created (and deleted on exit) during the call to ``redistribute`` and as such this latter approach may be slightly less efficient. mpi4py-fft-2.0.6/docs/source/howtocite.rst000066400000000000000000000010621462034230000204760ustar00rootroot00000000000000How to cite? ============ Please cite mpi4py-fft using :: @article{jpdc_fft, author = {{Dalcin, Lisandro and Mortensen, Mikael and Keyes, David E}}, year = {{2019}}, title = {{Fast parallel multidimensional FFT using advanced MPI}}, journal = {{Journal of Parallel and Distributed Computing}}, doi = {10.1016/j.jpdc.2019.02.006} } @electronic{mpi4py-fft, author = {{Lisandro Dalcin and Mikael Mortensen}}, title = {{mpi4py-fft}}, url = {{https://github.com/mpi4py/mpi4py-fft}} } mpi4py-fft-2.0.6/docs/source/howtocontribute.rst000066400000000000000000000007561462034230000217410ustar00rootroot00000000000000How to contribute? ================== Mpi4py-fft is an open source project and anyone is welcome to contribute. An easy way to get started is by suggesting a new enhancement on the `issue tracker `_. If you have found a bug, then either report this on the issue tracker, er even better, make a fork of the repository, fix the bug and then create a `pull request `_ to get the fix into the master branch. mpi4py-fft-2.0.6/docs/source/index.rst000066400000000000000000000010271462034230000176010ustar00rootroot00000000000000.. mpi4py-fft documentation master file, created by sphinx-quickstart on Fri Jun 15 23:47:49 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to mpi4py-fft's documentation! ====================================== .. include:: README.rst .. toctree:: :maxdepth: 2 :caption: Contents: introduction global dft parallel io installation howtocite howtocontribute .. toctree:: :caption: Indices and tables indices mpi4py-fft-2.0.6/docs/source/indices.rst000066400000000000000000000001331462034230000201050ustar00rootroot00000000000000Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` mpi4py-fft-2.0.6/docs/source/installation.rst000066400000000000000000000066551462034230000212070ustar00rootroot00000000000000Installation ============ Mpi4py-fft has a few dependencies * `mpi4py`_ * `FFTW`_ (serial) * `numpy`_ * `cython`_ (build dependency) * `h5py`_ (runtime dependency, optional) * `netCDF4`_ (runtime dependency, optional) that are mostly straight-forward to install, or already installed in most Python environments. The first two are usually most troublesome. Basically, for `mpi4py`_ you need to have a working MPI installation, whereas `FFTW`_ is available on most high performance computer systems. If you are using `conda`_, then all you need to install a fully functional mpi4py-fft, with all the above dependencies, is :: conda install -c conda-forge mpi4py-fft h5py=*=mpi* You probably want to install into a fresh environment, though, which can be achieved with :: conda create --name mpi4py-fft -c conda-forge mpi4py-fft conda activate mpi4py-fft Note that this gives you mpi4py-fft with default settings. This means that you will probably get the openmpi backend. To make a specific choice of backend just specify which, like this :: conda create --name mpi4py-fft -c conda-forge mpi4py-fft mpich If you do not use `conda`_, then you need to make sure that MPI and FFTW are installed by some other means. You can then install any version of mpi4py-fft hosted on `pypi`_ using `pip`_ :: pip install mpi4py-fft whereas the following will install the latest version from github :: pip install git+https://github.com/mpi4py/mpi4py-fft@master You can also build mpi4py-fft yourselves from the top directory, after cloning or forking :: pip install . or using `conda-build`_ with the recipes in folder ``conf/`` :: conda build -c conda-forge conf/ conda create --name mpi4py-fft -c conda-forge mpi4py-fft --use-local conda activate mpi4py-fft Additional dependencies ----------------------- For storing and retrieving data you need either `HDF5`_ or `netCDF4`_, compiled with support for MPI. Both are available with parallel support on `conda-forge`_ and can be installed into the current conda environment as :: conda install -c conda-forge h5py=*=mpi* netcdf4=*=mpi* Note that parallel HDF5 and NetCDF4 often are available as optimized modules on supercomputers. Otherwise, see the respective packages for how to install with support for MPI. Test installation ----------------- After installing (from source) it may be a good idea to run all the tests located in the ``tests`` folder. A range of tests may be run using the ``runtests.sh`` script :: conda install scipy, coverage cd tests/ ./runtests.sh This test-suit is run automatically on every commit to github, see, e.g., .. image:: https://dev.azure.com/mpi4py/mpi4py-fft/_apis/build/status/mpi4py.mpi4py-fft?branchName=master :target: https://dev.azure.com/mpi4py/mpi4py-fft .. _mpi4py-fft: https://github.com/mpi4py/mpi4py-fft .. _mpi4py: https://github.com/mpi4py/mpi4py .. _cython: http://cython.org .. _conda: https://conda.io/docs/ .. _conda-forge: https://conda-forge.org .. _FFTW: http://www.fftw.org .. _pip: https://pypi.org/project/pip/ .. _HDF5: https://www.hdfgroup.org .. _netCDF4: http://unidata.github.io/netcdf4-python/ .. _h5py: https://www.h5py.org .. _mpich: https://www.mpich.org .. _openmpi: https://www.open-mpi.org .. _numpy: https://www.numpy.org .. _numba: https://www.numba.org .. _conda-build: https://conda.io/docs/commands/build/conda-build.html .. _pypi: https://pypi.org/project/shenfun/ mpi4py-fft-2.0.6/docs/source/introduction.rst000066400000000000000000000041661462034230000212220ustar00rootroot00000000000000Introduction ============ The Python package `mpi4py-fft`_ is a tool primarily for working with Fast Fourier Transforms (FFTs) of (large) multidimensional arrays. There is really no limit as to how large the arrays can be, just as long as there is sufficient computing powers available. Also, there are no limits as to how transforms can be configured. Just about any combination of transforms from the FFTW library is supported. Finally, mpi4py-fft can also be used simply to distribute and redistribute large multidimensional arrays with MPI, without any transforms at all. The main contribution of mpi4py-fft can be found in just a few classes in the main modules: * :mod:`.mpifft` * :mod:`.pencil` * :mod:`.distarray` * :mod:`.libfft` * :mod:`.fftw` The :class:`.mpifft.PFFT` class is the major entry point for most users. It is a highly configurable class, which under the hood distributes large dataarrays and performs any type of transform, along any axes of a multidimensional array. The :mod:`.pencil` module is responsible for global redistributions through MPI. However, this module is rarely used on its own, unless one simply needs to do global redistributions without any transforms at all. The :mod:`.pencil` module is used heavily by the :class:`.PFFT` class. The :mod:`.distarray` module contains classes for simply distributing multidimensional arrays, with no regards to transforms. The distributed arrays created from the classes here can very well be used in any MPI application that requires a large multidimensional distributed array. The :mod:`.libfft` module provides a common interface to any of the serial transforms in the `FFTW `_ library. The :mod:`.fftw` module contains wrappers to the transforms provided by the `FFTW `_ library. We provide our own wrappers mainly because `pyfftw `_ does not include support for real-to-real transforms. Through the interface in :mod:`.fftw` we can do here, in Python, pretty much everything that you can do in the original FFTW library. .. _`mpi4py-fft`: https://github.com/mpi4py/mpi4py-fft mpi4py-fft-2.0.6/docs/source/io.rst000066400000000000000000000153071462034230000171070ustar00rootroot00000000000000Storing datafiles ================= mpi4py-fft works with regular Numpy arrays. However, since arrays in parallel can become very large, and the arrays live on multiple processors, we require parallel IO capabilities that goes beyond Numpys regular methods. In the :mod:`mpi4py_fft.io` module there are two helper classes for dumping dataarrays to either `HDF5 `_ or `NetCDF `_ format: * :class:`.HDF5File` * :class:`.NCFile` Both classes have one ``write`` and one ``read`` method that stores or reads data in parallel. A simple example of usage is:: from mpi4py import MPI import numpy as np from mpi4py_fft import PFFT, HDF5File, NCFile, newDistArray N = (128, 256, 512) T = PFFT(MPI.COMM_WORLD, N) u = newDistArray(T, forward_output=False) v = newDistArray(T, forward_output=False, val=2) u[:] = np.random.random(u.shape) # Store by first creating output files fields = {'u': [u], 'v': [v]} f0 = HDF5File('h5test.h5', mode='w') f1 = NCFile('nctest.nc', mode='w') f0.write(0, fields) f1.write(0, fields) v[:] = 3 f0.write(1, fields) f1.write(1, fields) Note that we are here creating two datafiles ``h5test.h5`` and ``nctest.nc``, for storing in HDF5 or NetCDF4 formats respectively. Normally, one would be satisfied using only one format, so this is only for illustration. We store the fields ``u`` and ``v`` on three different occasions, so the datafiles will contain three snapshots of each field ``u`` and ``v``. Also note that an alternative and perhaps simpler approach is to just use the ``write`` method of each distributed array:: u.write('h5test.h5', 'u', step=2) v.write('h5test.h5', 'v', step=2) u.write('nctest.nc', 'u', step=2) v.write('nctest.nc', 'v', step=2) The two different approaches can be used on the same output files. The stored dataarrays can also be retrieved later on:: u0 = newDistArray(T, forward_output=False) u1 = newDistArray(T, forward_output=False) u0.read('h5test.h5', 'u', 0) u1.read('h5test.h5', 'u', 1) # or alternatively for netcdf #u0.read('nctest.nc', 'u', 0) #u1.read('nctest.nc', 'u', 1) Note that one does not have to use the same number of processors when retrieving the data as when they were stored. It is also possible to store only parts of the, potentially large, arrays. Any chosen slice may be stored, using a *global* view of the arrays. It is possible to store both complete fields and slices in one single call by using the following appraoch:: f2 = HDF5File('variousfields.h5', mode='w') fields = {'u': [u, (u, [slice(None), slice(None), 4]), (u, [5, 5, slice(None)])], 'v': [v, (v, [slice(None), 6, slice(None)])]} f2.write(0, fields) f2.write(1, fields) Alternatively, one can use the write method of each field with the ``global_slice`` keyword argument:: u.write('variousfields.h5', 'u', 2) u.write('variousfields.h5', 'u', 2, global_slice=[slice(None), slice(None), 4]) u.write('variousfields.h5', 'u', 2, global_slice=[5, 5, slice(None)]) v.write('variousfields.h5', 'v', 2) v.write('variousfields.h5', 'v', 2, global_slice=[slice(None), 6, slice(None)]) In the end this will lead to an hdf5-file with groups:: variousfields.h5/ ├─ u/ | ├─ 1D/ | | └─ 5_5_slice/ | | ├─ 0 | | ├─ 1 | | └─ 3 | ├─ 2D/ | | └─ slice_slice_4/ | | ├─ 0 | | ├─ 1 | | └─ 2 | ├─ 3D/ | | ├─ 0 | | ├─ 1 | | └─ 2 | └─ mesh/ | ├─ x0 | ├─ x1 | └─ x2 └─ v/ ├─ 2D/ | └─ slice_6_slice/ | ├─ 0 | ├─ 1 | └─ 2 ├─ 3D/ | ├─ 0 | ├─ 1 | └─ 2 └─ mesh/ ├─ x0 ├─ x1 └─ x2 Note that a mesh is stored along with each group of data. This mesh can be given in two different ways when creating the datafiles: 1) A sequence of 2-tuples, where each 2-tuple contains the (origin, length) of the domain along its dimension. For example, a uniform mesh that originates from the origin, with lengths :math:`\pi, 2\pi, 3\pi`, can be given when creating the output file as:: f0 = HDF5File('filename.h5', domain=((0, pi), (0, 2*np.pi), (0, 3*np.pi))) or, using the write method of the distributed array: u.write('filename.h5', 'u', 0, domain=((0, pi), (0, 2*np.pi), (0, 3*np.pi))) 2) A sequence of arrays giving the coordinates for each dimension. For example:: d = (np.arange(N[0], dtype=np.float)*1*np.pi/N[0], np.arange(N[1], dtype=np.float)*2*np.pi/N[1], np.arange(N[2], dtype=np.float)*2*np.pi/N[2]) f0 = HDF5File('filename.h5', domain=d) With NetCDF4 the layout is somewhat different. For ``variousfields`` above, if we were using :class:`.NCFile` instead of :class:`.HDF5File`, we would get a datafile that with ``ncdump -h variousfields.nc`` would look like:: netcdf variousfields { dimensions: time = UNLIMITED ; // (3 currently) x = 128 ; y = 256 ; z = 512 ; variables: double time(time) ; double x(x) ; double y(y) ; double z(z) ; double u(time, x, y, z) ; double u_slice_slice_4(time, x, y) ; double u_5_5_slice(time, z) ; double v(time, x, y, z) ; double v_slice_6_slice(time, x, z) ; } Postprocessing -------------- Dataarrays stored to HDF5 files can be visualized using both `Paraview `_ and `Visit `_, whereas NetCDF4 files can at the time of writing only be opened with `Visit `_. To view the HDF5-files we first need to generate some light-weight *xdmf*-files that can be understood by both Paraview and Visit. To generate such files, simply throw the module :mod:`.io.generate_xdmf` on the HDF5-files:: from mpi4py_fft.io import generate_xdmf generate_xdmf('variousfields.h5') This will create a number of xdmf-files, one for each group that contains 2D or 3D data:: variousfields.xdmf variousfields_slice_slice_4.xdmf variousfields_slice_6_slice.xdmf These files can be opened directly in Paraview. However, note that for Visit, one has to generate the files using:: generate_xdmf('variousfields.h5', order='visit') because for some reason Paraview and Visit require the mesh in the xdmf-files to be stored in opposite order. mpi4py-fft-2.0.6/docs/source/modules.rst000066400000000000000000000001031462034230000201340ustar00rootroot00000000000000mpi4py_fft ========== .. toctree:: :maxdepth: 4 mpi4py_fft mpi4py-fft-2.0.6/docs/source/mpi4py_fft.fftw.rst000066400000000000000000000024331462034230000215220ustar00rootroot00000000000000mpi4py\_fft.fftw package ======================== Submodules ---------- mpi4py\_fft.fftw.fftw\_xfftn module ----------------------------------- .. automodule:: mpi4py_fft.fftw.fftw_xfftn :members: :special-members: __call__ :undoc-members: :show-inheritance: mpi4py\_fft.fftw.fftwf\_xfftn module ------------------------------------ .. automodule:: mpi4py_fft.fftw.fftwf_xfftn :members: :special-members: __call__ :undoc-members: :show-inheritance: mpi4py\_fft.fftw.fftwl\_xfftn module ------------------------------------ .. automodule:: mpi4py_fft.fftw.fftwl_xfftn :members: :special-members: __call__ :undoc-members: :show-inheritance: mpi4py\_fft.fftw.factory module ------------------------------- .. automodule:: mpi4py_fft.fftw.factory :members: :undoc-members: :show-inheritance: mpi4py\_fft.fftw.utilities module --------------------------------- .. automodule:: mpi4py_fft.fftw.utilities :members: :undoc-members: :show-inheritance: mpi4py\_fft.fftw.xfftn module ----------------------------- .. automodule:: mpi4py_fft.fftw.xfftn :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: mpi4py_fft.fftw :members: :undoc-members: :show-inheritance: mpi4py-fft-2.0.6/docs/source/mpi4py_fft.io.rst000066400000000000000000000015521462034230000211640ustar00rootroot00000000000000mpi4py_fft.io package ============================= Submodules ---------- mpi4py_fft.io.generate_xdmf module ----------------------------------------- .. automodule:: mpi4py_fft.io.generate_xdmf :members: :undoc-members: :show-inheritance: mpi4py_fft.io.h5py_file module ------------------------------------- .. automodule:: mpi4py_fft.io.h5py_file :members: :undoc-members: :show-inheritance: mpi4py_fft.io.nc_file module ----------------------------------- .. automodule:: mpi4py_fft.io.nc_file :members: :undoc-members: :show-inheritance: mpi4py_fft.io.file_base module ------------------------------------- .. automodule:: mpi4py_fft.io.file_base :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: mpi4py_fft.io :members: :undoc-members: :show-inheritance: mpi4py-fft-2.0.6/docs/source/mpi4py_fft.rst000066400000000000000000000015671462034230000205640ustar00rootroot00000000000000mpi4py\_fft package =================== Subpackages ----------- .. toctree:: mpi4py_fft.fftw mpi4py_fft.io Submodules ---------- mpi4py\_fft.libfft module ------------------------- .. automodule:: mpi4py_fft.libfft :members: :undoc-members: :show-inheritance: mpi4py\_fft.mpifft module ------------------------- .. automodule:: mpi4py_fft.mpifft :members: :special-members: __call__ :undoc-members: :show-inheritance: mpi4py\_fft.pencil module ------------------------- .. automodule:: mpi4py_fft.pencil :members: :undoc-members: :show-inheritance: mpi4py\_fft.distarray module ----------------------------------- .. automodule:: mpi4py_fft.distarray :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: mpi4py_fft :members: :undoc-members: :show-inheritance: mpi4py-fft-2.0.6/docs/source/parallel.rst000066400000000000000000000336641462034230000203020ustar00rootroot00000000000000Parallel Fast Fourier Transforms ================================ Parallel FFTs are computed through a combination of :ref:`global redistributions ` and :ref:`serial transforms `. In mpi4py-fft the interface to performing such parallel transforms is the :class:`.mpifft.PFFT` class. The class is highly configurable and best explained through a few examples. Slab decomposition .................. With slab decompositions we use only one group of processors and distribute only one index of a multidimensional array at the time. Consider the complete transform of a three-dimensional array of random numbers, and of shape (128, 128, 128). We can plan the transform of such an array with the following code snippet:: import numpy as np from mpi4py import MPI from mpi4py_fft import PFFT, newDistArray N = np.array([128, 128, 128], dtype=int) fft = PFFT(MPI.COMM_WORLD, N, axes=(0, 1, 2), dtype=np.float, grid=(-1,)) Here the signature ``N, axes=(0, 1, 2), dtype=np.float, grid=(-1,)`` tells us that the created ``fft`` instance is *planned* such as to slab distribute (along first axis) and transform any 3D array of shape ``N`` and type ``np.float``. Furthermore, we plan to transform axis 2 first, and then 1 and 0, which is exactly the reverse order of ``axes=(0, 1, 2)``. Mathematically, the planned transform corresponds to .. math:: \tilde{u}_{j_0/P,k_1,k_2} &= \mathcal{F}_1( \mathcal{F}_{2}(u_{j_0/P, j_1, j_2})), \\ \tilde{u}_{j_0, k_1/P, k_2} &\xleftarrow[P]{1\rightarrow 0} \tilde{u}_{j_0/P, k_1, k_2}, \\ \hat{u}_{k_0,k_1/P,k_2} &= \mathcal{F}_0(\tilde{u}_{j_0, k_1/P, k_2}). Note that axis 0 is distributed on the input array and axis 1 on the output array. In the first step above we compute the transforms along axes 2 and 1 (in that order), but we cannot compute the serial transform along axis 0 since the global array is distributed in that direction. We need to perform a global redistribution, the middle step, that realigns the global data such that it is aligned in axes 0. With data aligned in axis 0, we can perform the final transform :math:`\mathcal{F}_{0}` and be done with it. Assume now that all the code in this section is stored to a file named ``pfft_example.py``, and add to the above code:: u = newDistArray(fft, False) u[:] = np.random.random(u.shape).astype(u.dtype) u_hat = fft.forward(u, normalize=True) # Note that normalize=True is default and can be omitted uj = np.zeros_like(u) uj = fft.backward(u_hat, uj) assert np.allclose(uj, u) print(MPI.COMM_WORLD.Get_rank(), u.shape) Running this code with two processors (``mpirun -np 2 python pfft_example.py``) should raise no exception, and the output should be:: 1 (64, 128, 128) 0 (64, 128, 128) This shows that the first index has been shared between the two processors equally. The array ``u`` thus corresponds to :math:`u_{j_0/P,j_1,j_2}`. Note that the :func:`.newDistArray` function returns a :class:`.DistArray` object, which in turn is a subclassed Numpy ndarray. The :func:`.newDistArray` function uses ``fft`` to determine the size and type of the created distributed array, i.e., (64, 128, 128) and ``np.float`` for both processors. The ``False`` argument indicates that the shape and type should be that of the input array, as opposed to the output array type (:math:`\hat{u}_{k_0,k_1/P,k_2}` that one gets with ``True``). Note that because the input array is of real type, and not complex, the output array will be of global shape:: 128, 128, 65 The output array will be distributed in axis 1, so the output array shape should be (128, 64, 65) on each processor. We check this by adding the following code and rerunning:: u_hat = newDistArray(fft, True) print(MPI.COMM_WORLD.Get_rank(), u_hat.shape) leading to an additional print of:: 1 (128, 64, 65) 0 (128, 64, 65) To distribute in the first axis first is default and most efficient for row-major C arrays. However, we can easily configure the ``fft`` instance by modifying the axes keyword. Changing for example to:: fft = PFFT(MPI.COMM_WORLD, N, axes=(2, 0, 1), dtype=np.float) and axis 1 will be transformed first, such that the global output array will be of shape (128, 65, 128). The distributed input and output arrays will now have shape:: 0 (64, 128, 128) 1 (64, 128, 128) 0 (128, 33, 128) 1 (128, 32, 128) Note that the input array will still be distributed in axis 0 and the output in axis 1. This order of distribution can be tweaked using the `grid` keyword. Setting `grid=(1, 1, -1)` will force the last axis to be distributed on the input array. Another way to tweak the distribution is to use the :class:`.Subcomm` class directly:: from mpi4py_fft.pencil import Subcomm subcomms = Subcomm(MPI.COMM_WORLD, [1, 0, 1]) fft = PFFT(subcomms, N, axes=(0, 1, 2), dtype=np.float) Here the ``subcomms`` tuple will decide that axis 1 should be distributed, because the only zero in the list ``[1, 0, 1]`` is along axis 1. The ones determine that axes 0 and 2 should use one processor each, i.e., they should be non-distributed. The :class:`.PFFT` class has a few additional keyword arguments that one should be aware of. The default behaviour of :class:`.PFFT` is to use one transform object for each axis, and then use these sequentially. Setting ``collapse=True`` will attempt to minimize the number of transform objects by combining whenever possible. Take our example, the array :math:`u_{j_0/P,j_1,j_2}` can transform along both axes 1 and 2 simultaneously, without any intermediate global redistributions. By setting ``collapse=True`` only one object of ``rfftn(u, axes=(1, 2))`` will be used instead of two (like ``fftn(rfftn(u, axes=2), axes=1)``). Note that a collapse can also be configured through the ``axes`` keyword, using:: fft = PFFT(MPI.COMM_WORLD, N, axes=((0,), (1, 2)), dtype=np.float) will collapse axes 1 and 2, just like one would obtain with ``collapse=True``. If serial transforms other than :func:`.fftn`/:func:`.rfftn` and :func:`.ifftn`/:func:`.irfftn` are required, then this can be achieved using the ``transforms`` keyword and a dictionary pointing from axes to the type of transform. We can for example combine real-to-real with real-to-complex transforms like this:: from mpi4py_fft.fftw import rfftn, irfftn, dctn, idctn import functools dct = functools.partial(dctn, type=3) idct = functools.partial(idctn, type=3) transforms = {(0,): (rfftn, irfftn), (1, 2): (dct, idct)} r2c = PFFT(MPI.COMM_WORLD, N, axes=((0,), (1, 2)), transforms=transforms) u = newDistArray(r2c, False) u[:] = np.random.random(u.shape).astype(u.dtype) u_hat = r2c.forward(u) uj = np.zeros_like(u) uj = r2c.backward(u_hat, uj) assert np.allclose(uj, u) As a more complex example consider a 5-dimensional array where for some reason you need to perform discrete cosine transforms in axes 1 and 2, discrete sine transforms in axes 3 and 4, and a regular Fourier transform in the first axis. Here it makes sense to collapse the (1, 2) and (3, 4) axes, which leaves only the first axis uncollapsed. Hence we can then only use one processor group and a slab decomposition, whereas without collapsing we could have used four groups. A parallel transform object can be created and tested as:: N = (5, 6, 7, 8, 9) dctn = functools.partial(fftw.dctn, type=3) idctn = functools.partial(fftw.idctn, type=3) dstn = functools.partial(fftw.dstn, type=3) idstn = functools.partial(fftw.idstn, type=3) fft = PFFT(MPI.COMM_WORLD, N, ((0,), (1, 2), (3, 4)), grid=(-1,), transforms={(1, 2): (dctn, idctn), (3, 4): (dstn, idstn)}) A = newDistArray(fft, False) A[:] = np.random.random(A.shape) C = fftw.aligned_like(A) B = fft.forward(A) C = fft.backward(B, C) assert np.allclose(A, C) Pencil decomposition .................... A pencil decomposition uses two groups of processors. Each group then is responsible for distributing one index set each of a multidimensional array. We can perform a pencil decomposition simply by running the first example from the previous section, but now with 4 processors. To remind you, we put this in ``pfft_example.py``, where now ``grid=(-1,)`` has been removed in the PFFT calling:: import numpy as np from mpi4py import MPI from mpi4py_fft import PFFT, newDistArray N = np.array([128, 128, 128], dtype=int) fft = PFFT(MPI.COMM_WORLD, N, axes=(0, 1, 2), dtype=np.float) u = newDistArray(fft, False) u[:] = np.random.random(u.shape).astype(u.dtype) u_hat = fft.forward(u) uj = np.zeros_like(u) uj = fft.backward(u_hat, uj) assert np.allclose(uj, u) print(MPI.COMM_WORLD.Get_rank(), u.shape) The output of running ``mpirun -np 4 python pfft_example.py`` will then be:: 0 (64, 64, 128) 2 (64, 64, 128) 3 (64, 64, 128) 1 (64, 64, 128) Note that now both the two first index sets are shared, so we have a pencil decomposition. The shared input array is now denoted as :math:`u_{j_0/P_0,j_1/P_1,j2}` and the complete forward transform performs the following 5 steps: .. math:: \tilde{u}_{j_0/P_0,j_1/P_1,k_2} &= \mathcal{F}_{2}(u_{j_0/P_0, j_1/P_1, j_2}), \\ \tilde{u}_{j_0/P_0, j_1, k_2/P_1} &\xleftarrow[P_1]{2\rightarrow 1} \tilde{u}_{j_0/P_0, j_1/P_1, k_2}, \\ \tilde{u}_{j_0/P_0,k_1,k_2/P_1} &= \mathcal{F}_1(\tilde{u}_{j_0/P_0, j_1, k_2/P_1}), \\ \tilde{u}_{j_0, k_1/P_0, k_2/P_1} &\xleftarrow[P_0]{1\rightarrow 0} \tilde{u}_{j_0/P_0, k_1, k_2/P_1}, \\ \hat{u}_{k_0,k_1/P_0,k_2/P_1} &= \mathcal{F}_0(\tilde{u}_{j_0, k_1/P_0, k_2/P_1}). Like for the slab decomposition, the order of the different steps is configurable. Simply change the value of ``axes``, e.g., as:: fft = PFFT(MPI.COMM_WORLD, N, axes=(2, 0, 1), dtype=np.float) and the input and output arrays will be of shape:: 3 (64, 128, 64) 2 (64, 128, 64) 1 (64, 128, 64) 0 (64, 128, 64) 3 (64, 32, 128) 2 (64, 32, 128) 1 (64, 33, 128) 0 (64, 33, 128) We see that the input array is aligned in axis 1, because this is the direction transformed first. Convolution ........... Working with Fourier one sometimes need to transform the product of two or more functions, like .. math:: :label: ft_convolve \widehat{ab}_k = \int_{0}^{2\pi} a b e^{-i k x} dx, \quad \forall k \in [-N/2, \ldots, N/2-1] computed with DFT as .. math:: :label: dft_convolve \widehat{ab}_k = \frac{1}{N}\sum_{j=0}^{N-1}a_j b_j e^{-2\pi i j k / N}, \quad \forall \, k\in [-N/2, \ldots, N/2-1]. .. note:: We are here assuming an even number :math:`N` and use wavenumbers centered around zero. If :math:`a` and :math:`b` are two Fourier series with their own coefficients: .. math:: :label: ab_sums a &= \sum_{p=-N/2}^{N/2-1} \hat{a}_p e^{i p x}, \\ b &= \sum_{q=-N/2}^{N/2-1} \hat{b}_q e^{i q x}, then we can insert for the two sums from :eq:`ab_sums` in :eq:`ft_convolve` and get .. math:: :label: ab_convolve \widehat{ab}_k &= \int_{0}^{2\pi} \left( \sum_{p=-N/2}^{N/2-1} \hat{a}_p e^{i p x} \sum_{q=-N/2}^{N/2-1} \hat{b}_q e^{i q x} \right) e^{-i k x} dx, \quad \forall \, k \in [-N/2, \ldots, N/2-1] \\ \widehat{ab}_k &= \sum_{p=-N/2}^{N/2-1} \sum_{q=-N/2}^{N/2-1} \hat{a}_p \hat{b}_q \int_{0}^{2\pi} e^{-i (p+q-k) x} dx, \quad \forall \, k \in [-N/2, \ldots, N/2-1] The final integral is :math:`2\pi` for :math:`p+q=k` and zero otherwise. Consequently, we get .. math:: :label: ab_convolve2 \widehat{ab}_k = 2\pi \sum_{p=-N/2}^{N/2-1}\sum_{q=-N/2}^{N/2-1} \hat{a}_p \hat{b}_{q} \delta_{p+q, k} , \quad \forall \, k \in [-N/2, \ldots, N/2-1] Unfortunately, the convolution sum :eq:`ab_convolve2` is very expensive to compute, and the direct application of :eq:`dft_convolve` leads to aliasing errors. Luckily there is a fast approach that eliminates aliasing as well. The fast, alias-free, approach makes use of the FFT and zero-padded coefficient vectors. The idea is to zero-pad :math:`\hat{a}` and :math:`\hat{b}` in spectral space such that we get the extended sums .. math:: A_j &= \sum_{p=-M/2}^{M/2-1} \hat{\hat{a}}_p e^{2 \pi i p j/M}, \\ B_j &= \sum_{q=-M/2}^{M/2-1} \hat{\hat{b}}_q e^{2 \pi i q j/M}, where :math:`M>N` and where the coefficients have been zero-padded such that .. math:: \hat{\hat{a}}_p = \begin{cases} \hat{a}_p, &\forall |p| \le N/2 \\ 0, &\forall |p| \gt N/2 \end{cases} Now compute the nonlinear term in the larger physical space and compute the convolution as .. math:: :label: ab_convolve3 \widehat{ab}_k = \frac{1}{M} \sum_{j=0}^{M-1} A_j B_j e^{- 2 \pi i k j/M}, \quad \forall \, k \in [-M/2, \ldots, M/2-1] Finally, truncate the vector :math:`\widehat{ab}_k` to the original range :math:`k\in[-N/2, \ldots, N/2-1]`, simply by eliminating all the wavenumbers higher than :math:`|N/2|`. With mpi4py-fft we can compute this convolution using the ``padding`` keyword of the :class:`.PFFT` class:: import numpy as np from mpi4py_fft import PFFT, newDistArray from mpi4py import MPI comm = MPI.COMM_WORLD N = (128, 128) # Global shape in physical space fft = PFFT(comm, N, padding=[1.5, 1.5], dtype=np.complex) # Create arrays in normal spectral space a_hat = newDistArray(fft, True) b_hat = newDistArray(fft, True) a_hat[:] = np.random.random(a_hat.shape) + np.random.random(a_hat.shape)*1j b_hat[:] = np.random.random(a_hat.shape) + np.random.random(a_hat.shape)*1j # Transform to real space with padding a = newDistArray(fft, False) b = newDistArray(fft, False) assert a.shape == (192//comm.Get_size(), 192) a = fft.backward(a_hat, a) b = fft.backward(b_hat, b) # Do forward transform with truncation ab_hat = fft.forward(a*b) .. note:: The padded instance of the :class:`.PFFT` class is often used in addition to a regular non-padded class. The padded version is then used to handle non-linearities, whereas the non-padded takes care of the rest, see `demo `_. mpi4py-fft-2.0.6/examples/000077500000000000000000000000001462034230000153265ustar00rootroot00000000000000mpi4py-fft-2.0.6/examples/darray.py000066400000000000000000000047121462034230000171660ustar00rootroot00000000000000import numpy as np from mpi4py import MPI from mpi4py_fft.distarray import DistArray, newDistArray from mpi4py_fft.mpifft import PFFT # Test DistArray. Start with alignment in axis 0, then tranfer to 2 and # finally to 1 N = (16, 14, 12) z0 = DistArray(N, dtype=float, alignment=0) z0[:] = np.random.randint(0, 10, z0.shape) s0 = MPI.COMM_WORLD.allreduce(np.sum(z0)) z1 = z0.redistribute(2) s1 = MPI.COMM_WORLD.allreduce(np.sum(z1)) z2 = z1.redistribute(1) s2 = MPI.COMM_WORLD.allreduce(np.sum(z2)) assert s0 == s1 == s2 fft = PFFT(MPI.COMM_WORLD, darray=z2, axes=(0, 2, 1)) z3 = newDistArray(fft, forward_output=True) z2c = z2.copy() fft.forward(z2, z3) fft.backward(z3, z2) s0, s1 = np.linalg.norm(z2), np.linalg.norm(z2c) assert abs(s0-s1) < 1e-12, s0-s1 v0 = newDistArray(fft, forward_output=False, rank=1) #v0 = Function(fft, forward_output=False, rank=1) v0[:] = np.random.random(v0.shape) v0c = v0.copy() v1 = newDistArray(fft, forward_output=True, rank=1) for i in range(3): v1[i] = fft.forward(v0[i], v1[i]) for i in range(3): v0[i] = fft.backward(v1[i], v0[i]) s0, s1 = np.linalg.norm(v0c), np.linalg.norm(v0) assert abs(s0-s1) < 1e-12 nfft = PFFT(MPI.COMM_WORLD, darray=v0[0], axes=(0, 2, 1)) for i in range(3): v1[i] = nfft.forward(v0[i], v1[i]) for i in range(3): v0[i] = nfft.backward(v1[i], v0[i]) s0, s1 = np.linalg.norm(v0c), np.linalg.norm(v0) assert abs(s0-s1) < 1e-12 N = (6, 6, 6) z = DistArray(N, dtype=float, alignment=0) z[:] = MPI.COMM_WORLD.Get_rank() g0 = z.get((0, slice(None), 0)) z2 = z.redistribute(2) z = z2.redistribute(out=z) g1 = z.get((0, slice(None), 0)) assert np.all(g0 == g1) s0 = MPI.COMM_WORLD.reduce(np.linalg.norm(z)**2) s1 = MPI.COMM_WORLD.reduce(np.linalg.norm(z2)**2) if MPI.COMM_WORLD.Get_rank() == 0: assert abs(s0-s1) < 1e-12 N = (3, 3, 6, 6, 6) z2 = DistArray(N, dtype=float, val=1, alignment=2, rank=2) z2[:] = MPI.COMM_WORLD.Get_rank() z1 = z2.redistribute(1) z0 = z1.redistribute(0) s0 = MPI.COMM_WORLD.reduce(np.linalg.norm(z2)**2) s1 = MPI.COMM_WORLD.reduce(np.linalg.norm(z0)**2) if MPI.COMM_WORLD.Get_rank() == 0: assert abs(s0-s1) < 1e-12 z1 = z0.redistribute(out=z1) z0 = z1.redistribute(out=z0) N = (6, 6, 6, 6, 6) m0 = DistArray(N, dtype=float, alignment=2) m0[:] = MPI.COMM_WORLD.Get_rank() m1 = m0.redistribute(4) m0 = m1.redistribute(out=m0) s0 = MPI.COMM_WORLD.reduce(np.linalg.norm(m0)**2) s1 = MPI.COMM_WORLD.reduce(np.linalg.norm(m1)**2) if MPI.COMM_WORLD.Get_rank() == 0: assert abs(s0-s1) < 1e-12 mpi4py-fft-2.0.6/examples/spectral_dns_solver.py000066400000000000000000000107411462034230000217560ustar00rootroot00000000000000""" Demo program that solves the Navier Stokes equations in a triply periodic domain. The solution is initialized using the Taylor-Green vortex and evolved in time with a 4'th order Runge Kutta method. Please note that this is not an optimized solver. For fast solvers, see http://github.com/spectralDNS/spectralDNS """ from time import time import numpy as np from mpi4py import MPI from mpi4py_fft import PFFT, newDistArray # Set viscosity, end time and time step nu = 0.000625 T = 0.1 dt = 0.01 # Set global size of the computational box M = 6 N = [2**M, 2**M, 2**M] L = np.array([2*np.pi, 4*np.pi, 4*np.pi], dtype=float) # Needs to be (2*int)*pi in all directions (periodic) because of initialization # Create instance of PFFT to perform parallel FFT + an instance to do FFT with padding (3/2-rule) FFT = PFFT(MPI.COMM_WORLD, N, collapse=False) #FFT_pad = PFFT(MPI.COMM_WORLD, N, padding=[1.5, 1.5, 1.5]) FFT_pad = FFT # Declare variables needed to solve Navier-Stokes U = newDistArray(FFT, False, rank=1, view=True) # Velocity U_hat = newDistArray(FFT, rank=1, view=True) # Velocity transformed P = newDistArray(FFT, False, view=True) # Pressure (scalar) P_hat = newDistArray(FFT, view=True) # Pressure transformed U_hat0 = newDistArray(FFT, rank=1, view=True) # Runge-Kutta work array U_hat1 = newDistArray(FFT, rank=1, view=True) # Runge-Kutta work array a = [1./6., 1./3., 1./3., 1./6.] # Runge-Kutta parameter b = [0.5, 0.5, 1.] # Runge-Kutta parameter dU = newDistArray(FFT, rank=1, view=True) # Right hand side of ODEs curl = newDistArray(FFT, False, rank=1, view=True) U_pad = newDistArray(FFT_pad, False, rank=1, view=True) curl_pad = newDistArray(FFT_pad, False, rank=1, view=True) def get_local_mesh(FFT, L): """Returns local mesh.""" X = np.ogrid[FFT.local_slice(False)] N = FFT.global_shape() for i in range(len(N)): X[i] = (X[i]*L[i]/N[i]) X = [np.broadcast_to(x, FFT.shape(False)) for x in X] return X def get_local_wavenumbermesh(FFT, L): """Returns local wavenumber mesh.""" s = FFT.local_slice() N = FFT.global_shape() # Set wavenumbers in grid k = [np.fft.fftfreq(n, 1./n).astype(int) for n in N[:-1]] k.append(np.fft.rfftfreq(N[-1], 1./N[-1]).astype(int)) K = [ki[si] for ki, si in zip(k, s)] Ks = np.meshgrid(*K, indexing='ij', sparse=True) Lp = 2*np.pi/L for i in range(3): Ks[i] = (Ks[i]*Lp[i]).astype(float) return [np.broadcast_to(k, FFT.shape(True)) for k in Ks] X = get_local_mesh(FFT, L) K = get_local_wavenumbermesh(FFT, L) K = np.array(K).astype(float) K2 = np.sum(K*K, 0, dtype=float) K_over_K2 = K.astype(float) / np.where(K2 == 0, 1, K2).astype(float) def cross(x, y, z): """Cross product z = x \times y""" z[0] = FFT_pad.forward(x[1]*y[2]-x[2]*y[1], z[0]) z[1] = FFT_pad.forward(x[2]*y[0]-x[0]*y[2], z[1]) z[2] = FFT_pad.forward(x[0]*y[1]-x[1]*y[0], z[2]) return z def compute_curl(x, z): z[2] = FFT_pad.backward(1j*(K[0]*x[1]-K[1]*x[0]), z[2]) z[1] = FFT_pad.backward(1j*(K[2]*x[0]-K[0]*x[2]), z[1]) z[0] = FFT_pad.backward(1j*(K[1]*x[2]-K[2]*x[1]), z[0]) return z def compute_rhs(rhs): for j in range(3): U_pad[j] = FFT_pad.backward(U_hat[j], U_pad[j]) curl_pad[:] = compute_curl(U_hat, curl_pad) rhs = cross(U_pad, curl_pad, rhs) P_hat[:] = np.sum(rhs*K_over_K2, 0, out=P_hat) rhs -= P_hat*K rhs -= nu*K2*U_hat return rhs # Initialize a Taylor Green vortex U[0] = np.sin(X[0])*np.cos(X[1])*np.cos(X[2]) U[1] = -np.cos(X[0])*np.sin(X[1])*np.cos(X[2]) U[2] = 0 for i in range(3): U_hat[i] = FFT.forward(U[i], U_hat[i]) # Integrate using a 4th order Rung-Kutta method t = 0.0 tstep = 0 t0 = time() while t < T-1e-8: t += dt tstep += 1 U_hat1[:] = U_hat0[:] = U_hat for rk in range(4): dU = compute_rhs(dU) if rk < 3: U_hat[:] = U_hat0 + b[rk]*dt*dU U_hat1[:] += a[rk]*dt*dU U_hat[:] = U_hat1[:] for i in range(3): U[i] = FFT.backward(U_hat[i], U[i]) #k = MPI.COMM_WORLD.reduce(sum(U*U)/N[0]/N[1]/N[2]/2) #if MPI.COMM_WORLD.Get_rank() == 0: #print("Energy = {}".format(k)) ## Transform result to real physical space #for i in range(3): #U[i] = FFT.backward(U_hat[i], U[i]) # Check energy k = MPI.COMM_WORLD.reduce(np.sum(U*U)/N[0]/N[1]/N[2]/2) if MPI.COMM_WORLD.Get_rank() == 0: print('Time = {}'.format(time()-t0)) assert round(float(k) - 0.124953117517, 7) == 0 mpi4py-fft-2.0.6/examples/transforms.py000066400000000000000000000024051462034230000200770ustar00rootroot00000000000000import functools import numpy as np from mpi4py import MPI from mpi4py_fft import PFFT, newDistArray from mpi4py_fft.fftw import dctn, idctn # Set global size of the computational box N = np.array([18, 18, 18], dtype=int) dct = functools.partial(dctn, type=3) idct = functools.partial(idctn, type=3) transforms = {(1, 2): (dct, idct)} fft = PFFT(MPI.COMM_WORLD, N, axes=None, collapse=True, grid=(-1,), transforms=transforms) pfft = PFFT(MPI.COMM_WORLD, N, axes=((0,), (1, 2)), grid=(-1,), padding=[1.5, 1.0, 1.0], transforms=transforms) assert fft.axes == pfft.axes u = newDistArray(fft, forward_output=False) u[:] = np.random.random(u.shape).astype(u.dtype) u_hat = newDistArray(fft, forward_output=True) u_hat = fft.forward(u, u_hat) uj = np.zeros_like(u) uj = fft.backward(u_hat, uj) assert np.allclose(uj, u) u_padded = newDistArray(pfft, forward_output=False) uc = u_hat.copy() u_padded = pfft.backward(u_hat, u_padded) u_hat = pfft.forward(u_padded, u_hat) assert np.allclose(u_hat, uc) #cfft = PFFT(MPI.COMM_WORLD, N, dtype=complex, padding=[1.5, 1.5, 1.5]) cfft = PFFT(MPI.COMM_WORLD, N, dtype=complex) uc = np.random.random(cfft.backward.input_array.shape).astype(complex) u2 = cfft.backward(uc) u3 = uc.copy() u3 = cfft.forward(u2, u3) assert np.allclose(uc, u3) mpi4py-fft-2.0.6/mpi4py_fft/000077500000000000000000000000001462034230000155715ustar00rootroot00000000000000mpi4py-fft-2.0.6/mpi4py_fft/__init__.py000066400000000000000000000017511462034230000177060ustar00rootroot00000000000000""" This is the **mpi4py-fft** package What is **mpi4py-fft**? ======================= The Python package **mpi4py-fft** is a tool primarily for working with Fast Fourier Transforms (FFTs) of (large) multidimensional arrays. There is really no limit as to how large the arrays can be, just as long as there is sufficient computing powers available. Also, there are no limits as to how transforms can be configured. Just about any combination of transforms from the FFTW library is supported. Furthermore, **mpi4py-fft** can also be used simply to perform global redistributions (distribute and communicate) of large arrays with MPI, without any transforms at all. For more information, see `documentation `_. """ __version__ = '2.0.6' __author__ = 'Lisandro Dalcin and Mikael Mortensen' from .distarray import DistArray, newDistArray, Function from .mpifft import PFFT from . import fftw from .fftw import fftlib from .io import HDF5File, NCFile, generate_xdmf mpi4py-fft-2.0.6/mpi4py_fft/distarray.py000066400000000000000000000427531462034230000201600ustar00rootroot00000000000000import os from numbers import Number, Integral import numpy as np from mpi4py import MPI from .pencil import Pencil, Subcomm from .io import HDF5File, NCFile, FileBase comm = MPI.COMM_WORLD class DistArray(np.ndarray): """Distributed Numpy array This Numpy array is part of a larger global array. Information about the distribution is contained in the attributes. Parameters ---------- global_shape : sequence of ints Shape of non-distributed global array subcomm : None, :class:`.Subcomm` object or sequence of ints, optional Describes how to distribute the array val : Number or None, optional Initialize array with this number if buffer is not given dtype : np.dtype, optional Type of array buffer : Numpy array, optional Array of correct shape. The buffer owns the memory that is used for this array. alignment : None or int, optional Make sure array is aligned in this direction. Note that alignment does not take rank into consideration. rank : int, optional Rank of tensor (number of free indices, a scalar is zero, vector one, matrix two) For more information, see `numpy.ndarray `_ Note ---- Tensors of rank higher than 0 are not distributed in the first ``rank`` indices. For example, >>> from mpi4py_fft import DistArray >>> a = DistArray((3, 8, 8, 8), rank=1) >>> print(a.pencil.shape) (8, 8, 8) The array ``a`` cannot be distributed in the first axis of length 3 since rank is 1 and this first index represent the vector component. The ``pencil`` attribute of ``a`` thus only considers the last three axes. Also note that the ``alignment`` keyword does not take rank into consideration. Setting alignment=2 for the array above means that the last axis will be aligned, also when rank>0. """ def __new__(cls, global_shape, subcomm=None, val=None, dtype=float, buffer=None, strides=None, alignment=None, rank=0): if len(global_shape[rank:]) < 2: # 1D case obj = np.ndarray.__new__(cls, global_shape, dtype=dtype, buffer=buffer, strides=strides) if buffer is None and isinstance(val, Number): obj.fill(val) obj._rank = rank obj._p0 = None return obj if isinstance(subcomm, Subcomm): pass else: if isinstance(subcomm, (tuple, list)): assert len(subcomm) == len(global_shape[rank:]) # Do nothing if already containing communicators. A tuple of subcommunicators is not necessarily a Subcomm if not np.all([isinstance(s, MPI.Comm) for s in subcomm]): subcomm = Subcomm(comm, subcomm) else: assert subcomm is None subcomm = [0] * len(global_shape[rank:]) if alignment is not None: subcomm[alignment] = 1 else: subcomm[-1] = 1 alignment = len(subcomm)-1 subcomm = Subcomm(comm, subcomm) sizes = [s.Get_size() for s in subcomm] if alignment is not None: assert isinstance(alignment, (int, np.integer)) assert sizes[alignment] == 1 else: # Decide that alignment is the last axis with size 1 alignment = np.flatnonzero(np.array(sizes) == 1)[-1] p0 = Pencil(subcomm, global_shape[rank:], axis=alignment) subshape = p0.subshape if rank > 0: subshape = global_shape[:rank] + subshape obj = np.ndarray.__new__(cls, subshape, dtype=dtype, buffer=buffer) if buffer is None and isinstance(val, Number): obj.fill(val) obj._p0 = p0 obj._rank = rank return obj def __array_finalize__(self, obj): if obj is None: return self._p0 = getattr(obj, '_p0', None) self._rank = getattr(obj, '_rank', None) @property def alignment(self): """Return alignment of local ``self`` array Note ---- For tensors of rank > 0 the array is actually aligned along ``alignment+rank`` """ return self._p0.axis @property def global_shape(self): """Return global shape of ``self``""" return self.shape[:self.rank] + self._p0.shape @property def substart(self): """Return starting indices of local ``self`` array""" return (0,)*self.rank + self._p0.substart @property def subcomm(self): """Return tuple of subcommunicators for all axes of ``self``""" return (MPI.COMM_SELF,)*self.rank + self._p0.subcomm @property def commsizes(self): """Return number of processors along each axis of ``self``""" return [s.Get_size() for s in self.subcomm] @property def pencil(self): """Return pencil describing distribution of ``self``""" return self._p0 @property def rank(self): """Return tensor rank of ``self``""" return self._rank @property def dimensions(self): """Return dimensions of array not including rank""" return len(self._p0.shape) def __getitem__(self, i): # Return DistArray if the result is a component of a tensor # Otherwise return ndarray view if self.ndim == 1: return np.ndarray.__getitem__(self, i) if isinstance(i, (Integral, slice)) and self.rank > 0: v0 = np.ndarray.__getitem__(self, i) v0._rank = self.rank - (self.ndim - v0.ndim) return v0 if isinstance(i, (Integral, slice)) and self.rank == 0: return np.ndarray.__getitem__(self.v, i) assert isinstance(i, tuple) if len(i) <= self.rank: v0 = np.ndarray.__getitem__(self, i) v0._rank = self.rank - (self.ndim - v0.ndim) return v0 return np.ndarray.__getitem__(self.v, i) @property def v(self): """ Return local ``self`` array as an ``ndarray`` object""" return self.__array__() def get(self, gslice): """Return global slice of ``self`` Parameters ---------- gslice : sequence of slice(None) and ints The slice of the global array. Returns ------- Numpy array The slice of the global array is returned on rank 0, whereas the remaining ranks return None Example ------- >>> import subprocess >>> fx = open('gs_script.py', 'w') >>> h = fx.write(''' ... from mpi4py import MPI ... from mpi4py_fft.distarray import DistArray ... comm = MPI.COMM_WORLD ... N = (6, 6, 6) ... z = DistArray(N, dtype=float, alignment=0) ... z[:] = comm.Get_rank() ... g = z.get((0, slice(None), 0)) ... if comm.Get_rank() == 0: ... print(g)''') >>> fx.close() >>> print(subprocess.getoutput('mpirun -np 4 python gs_script.py')) [0. 0. 0. 2. 2. 2.] """ # Note that this implementation uses h5py to take care of the local to # global MPI. We create a global file with MPI, but then open it without # MPI and only on rank 0. import h5py f = h5py.File('tmp.h5', 'w', driver="mpio", comm=comm) s = self.local_slice() sp = np.nonzero([isinstance(x, slice) for x in gslice])[0] sf = tuple(np.take(s, sp)) f.require_dataset('data', shape=tuple(np.take(self.global_shape, sp)), dtype=self.dtype) gslice = list(gslice) # We are required to check if the indices in si are on this processor si = np.nonzero([isinstance(x, int) and not z == slice(None) for x, z in zip(gslice, s)])[0] on_this_proc = True for i in si: if gslice[i] >= s[i].start and gslice[i] < s[i].stop: gslice[i] -= s[i].start else: on_this_proc = False if on_this_proc: f["data"][sf] = self[tuple(gslice)] f.close() c = None if comm.Get_rank() == 0: h = h5py.File('tmp.h5', 'r') c = h['data'].__array__() h.close() os.remove('tmp.h5') return c def local_slice(self): """Return local view into global ``self`` array Returns ------- List of slices Each item of the returned list is the slice along that axis, describing the view of the ``self`` array into the global array. Example ------- Print local_slice of a global array of shape (16, 14, 12) using 4 processors. >>> import subprocess >>> fx = open('ls_script.py', 'w') >>> h = fx.write(''' ... from mpi4py import MPI ... from mpi4py_fft.distarray import DistArray ... comm = MPI.COMM_WORLD ... N = (16, 14, 12) ... z = DistArray(N, dtype=float, alignment=0) ... ls = comm.gather(z.local_slice()) ... if comm.Get_rank() == 0: ... for l in ls: ... print(l)''') >>> fx.close() >>> print(subprocess.getoutput('mpirun -np 4 python ls_script.py')) (slice(0, 16, None), slice(0, 7, None), slice(0, 6, None)) (slice(0, 16, None), slice(0, 7, None), slice(6, 12, None)) (slice(0, 16, None), slice(7, 14, None), slice(0, 6, None)) (slice(0, 16, None), slice(7, 14, None), slice(6, 12, None)) """ v = [slice(start, start+shape) for start, shape in zip(self._p0.substart, self._p0.subshape)] return tuple([slice(0, s) for s in self.shape[:self.rank]] + v) def get_pencil_and_transfer(self, axis): """Return pencil and transfer objects for alignment along ``axis`` Parameters ---------- axis : int The new axis to align data with Returns ------- 2-tuple 2-tuple where first item is a :class:`.Pencil` aligned in ``axis``. Second item is a :class:`.Transfer` object for executing the redistribution of data """ p1 = self._p0.pencil(axis) return p1, self._p0.transfer(p1, self.dtype) def redistribute(self, axis=None, out=None): """Global redistribution of local ``self`` array Parameters ---------- axis : int, optional Align local ``self`` array along this axis out : :class:`.DistArray`, optional Copy data to this array of possibly different alignment Returns ------- DistArray : out The ``self`` array globally redistributed. If keyword ``out`` is None then a new DistArray (aligned along ``axis``) is created and returned. Otherwise the provided out array is returned. """ # Take care of some trivial cases first if axis == self.alignment: return self if axis is not None and isinstance(out, DistArray): assert axis == out.alignment # Check if self is already aligned along axis. In that case just switch # axis of pencil (both axes are undivided) and return if axis is not None: if self.commsizes[self.rank+axis] == 1: self.pencil.axis = axis return self if out is not None: assert isinstance(out, DistArray) assert self.global_shape == out.global_shape axis = out.alignment if self.commsizes == out.commsizes: # Just a copy required. Should probably not be here out[:] = self return out # Check that arrays are compatible for i in range(len(self._p0.shape)): if i not in (self.alignment, out.alignment): assert self.pencil.subcomm[i] == out.pencil.subcomm[i] assert self.pencil.subshape[i] == out.pencil.subshape[i] p1, transfer = self.get_pencil_and_transfer(axis) if out is None: out = DistArray(self.global_shape, subcomm=p1.subcomm, dtype=self.dtype, alignment=axis, rank=self.rank) if self.rank == 0: transfer.forward(self, out) elif self.rank == 1: for i in range(self.shape[0]): transfer.forward(self[i], out[i]) elif self.rank == 2: for i in range(self.shape[0]): for j in range(self.shape[1]): transfer.forward(self[i, j], out[i, j]) transfer.destroy() return out def write(self, filename, name='darray', step=0, global_slice=None, domain=None, as_scalar=False): """Write snapshot ``step`` of ``self`` to file ``filename`` Parameters ---------- filename : str or instance of :class:`.FileBase` The name of the file (or the file itself) that is used to store the requested data in ``self`` name : str, optional Name used for storing snapshot in file. step : int, optional Index used for snapshot in file. global_slice : sequence of slices or integers, optional Store only this global slice of ``self`` domain : sequence, optional An optional spatial mesh or domain to go with the data. Sequence of either - 2-tuples, where each 2-tuple contains the (origin, length) of each dimension, e.g., (0, 2*pi). - Arrays of coordinates, e.g., np.linspace(0, 2*pi, N). One array per dimension as_scalar : boolean, optional Whether to store rank > 0 arrays as scalars. Default is False. Example ------- >>> from mpi4py_fft import DistArray >>> u = DistArray((8, 8), val=1) >>> u.write('h5file.h5', 'u', 0) >>> u.write('h5file.h5', 'u', (slice(None), 4)) """ if isinstance(filename, str): writer = HDF5File if filename.endswith('.h5') else NCFile f = writer(filename, domain=domain, mode='a') elif isinstance(filename, FileBase): f = filename field = [self] if global_slice is None else [(self, global_slice)] f.write(step, {name: field}, as_scalar=as_scalar) def read(self, filename, name='darray', step=0): """Read data ``name`` at index ``step``from file ``filename`` into ``self`` Note ---- Only whole arrays can be read from file, not slices. Parameters ---------- filename : str or instance of :class:`.FileBase` The name of the file (or the file itself) holding the data that is loaded into ``self``. name : str, optional Internal name in file of snapshot to be read. step : int, optional Index of field to be read. Default is 0. Example ------- >>> from mpi4py_fft import DistArray >>> u = DistArray((8, 8), val=1) >>> u.write('h5file.h5', 'u', 0) >>> v = DistArray((8, 8)) >>> v.read('h5file.h5', 'u', 0) >>> assert np.allclose(u, v) """ if isinstance(filename, str): writer = HDF5File if filename.endswith('.h5') else NCFile f = writer(filename, mode='r') elif isinstance(filename, FileBase): f = filename f.read(self, name, step=step) def newDistArray(pfft, forward_output=True, val=0, rank=0, view=False): """Return a new :class:`.DistArray` object for provided :class:`.PFFT` object Parameters ---------- pfft : :class:`.PFFT` object forward_output: boolean, optional If False then create DistArray of shape/type for input to forward transform, otherwise create DistArray of shape/type for output from forward transform. val : int or float, optional Value used to initialize array. rank: int, optional Scalar has rank 0, vector 1 and matrix 2. view : bool, optional If True return view of the underlying Numpy array, i.e., return cls.view(np.ndarray). Note that the DistArray still will be accessible through the base attribute of the view. Returns ------- DistArray A new :class:`.DistArray` object. Return the ``ndarray`` view if keyword ``view`` is True. Examples -------- >>> from mpi4py import MPI >>> from mpi4py_fft import PFFT, newDistArray >>> FFT = PFFT(MPI.COMM_WORLD, [64, 64, 64]) >>> u = newDistArray(FFT, False, rank=1) >>> u_hat = newDistArray(FFT, True, rank=1) """ global_shape = pfft.global_shape(forward_output) p0 = pfft.pencil[forward_output] if forward_output is True: dtype = pfft.forward.output_array.dtype else: dtype = pfft.forward.input_array.dtype global_shape = (len(global_shape),)*rank + global_shape z = DistArray(global_shape, subcomm=p0.subcomm, val=val, dtype=dtype, alignment=p0.axis, rank=rank) return z.v if view else z def Function(*args, **kwargs): #pragma: no cover import warnings warnings.warn("Function() is deprecated; use newDistArray().", FutureWarning) if 'tensor' in kwargs: kwargs['rank'] = 1 del kwargs['tensor'] return newDistArray(*args, **kwargs) mpi4py-fft-2.0.6/mpi4py_fft/fftw/000077500000000000000000000000001462034230000165375ustar00rootroot00000000000000mpi4py-fft-2.0.6/mpi4py_fft/fftw/__init__.py000066400000000000000000000002331462034230000206460ustar00rootroot00000000000000from .xfftn import * from .factory import get_planned_FFT, export_wisdom, import_wisdom, \ forget_wisdom, cleanup, set_timelimit, get_fftw_lib, fftlib mpi4py-fft-2.0.6/mpi4py_fft/fftw/factory.py000066400000000000000000000122161462034230000205620ustar00rootroot00000000000000#pylint: disable=no-name-in-module import numpy as np from mpi4py import MPI from .utilities import FFTW_FORWARD, FFTW_MEASURE def get_fftw_lib(dtype): """Return compiled fftw module interfacing the FFTW library Parameters ---------- dtype : dtype Data precision Returns ------- Module or ``None`` Module can be either :mod:`.fftwf_xfftn`, :mod:`.fftw_xfftn` or :mod:`.fftwl_xfftn`, depending on precision. """ dtype = np.dtype(dtype).char.upper() if dtype == 'G': try: from . import fftwl_xfftn return fftwl_xfftn except ImportError: #pragma: no cover return None elif dtype == 'D': try: from . import fftw_xfftn return fftw_xfftn except ImportError: #pragma: no cover return None elif dtype == 'F': try: from . import fftwf_xfftn return fftwf_xfftn except ImportError: #pragma: no cover return None else: #pragma: no cover return None fftlib = {} for t in 'fdg': fftw_lib = get_fftw_lib(t) if fftw_lib is not None: fftlib[t.upper()] = fftw_lib comm = MPI.COMM_WORLD def get_planned_FFT(input_array, output_array, axes=(-1,), kind=FFTW_FORWARD, threads=1, flags=(FFTW_MEASURE,), normalization=1.0): """Return instance of transform class Parameters ---------- input_array : array real or complex input array output_array : array real or complex output array axes : sequence of ints, optional The axes to transform over, starting from the last kind : int or sequence of ints, optional Any one of (or possibly several for real-to-real) - FFTW_FORWARD (-1) - FFTW_R2HC (0) - FFTW_BACKWARD (1) - FFTW_HC2R (1) - FFTW_DHT (2) - FFTW_REDFT00 (3) - FFTW_REDFT01 (4) - FFTW_REDFT10 (5) - FFTW_REDFT11 (6) - FFTW_RODFT00 (7) - FFTW_RODFT01 (8) - FFTW_RODFT10 (9) - FFTW_RODFT11 (10) threads : int, optional Number of threads to use in transforms flags : int or sequence of ints, optional Any one of, but not necessarily for all transforms or all combinations - FFTW_MEASURE (0) - FFTW_DESTROY_INPUT (1) - FFTW_UNALIGNED (2) - FFTW_CONSERVE_MEMORY (4) - FFTW_EXHAUSTIVE (8) - FFTW_PRESERVE_INPUT (16) - FFTW_PATIENT (32) - FFTW_ESTIMATE (64) - FFTW_WISDOM_ONLY (2097152) normalization : float, optional Normalization factor Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for the desired transforms """ dtype = input_array.dtype.char assert dtype.upper() in fftlib _fft = fftlib[dtype.upper()] return _fft.FFT(input_array, output_array, axes, kind, threads, flags, normalization) def export_wisdom(filename): """Export FFTW wisdom Parameters ---------- filename : str Name of file used to export wisdom to Note ---- Wisdom is stored for all precisions available: float, double and long double, using, respectively, prefix ``Fn_``, ``Dn_`` and ``Gn_``, where n is the rank of the processor. Wisdom is imported using :func:`.import_wisdom`, which must be called with the same MPI configuration as used with :func:`.export_wisdom`. See also -------- :func:`.import_wisdom` """ rank = str(comm.Get_rank()) e = [] for key, lib in fftlib.items(): e.append(lib.export_wisdom(bytearray(key+rank+'_'+filename, 'utf-8'))) assert np.all(np.array(e) == 1), "Not able to export wisdom {}".format(filename) def import_wisdom(filename): """Import FFTW wisdom Parameters ---------- filename : str Name of file used to import wisdom from Note ---- Wisdom is imported for all available precisions: float, double and long double, using, respectively, prefix ``Fn_``, ``Dn_`` and ``Gn_``, where n is the rank of the processor. Wisdom is exported using :func:`.export_wisdom`. Note that importing wisdom only works when using the same MPI configuration as used with :func:`.export_wisdom`. See also -------- :func:`.export_wisdom` """ rank = str(comm.Get_rank()) e = [] for key, lib in fftlib.items(): e.append(lib.import_wisdom(bytearray(key+rank+'_'+filename, 'utf-8'))) assert np.all(np.array(e) == 1), "Not able to import wisdom {}".format(filename) def forget_wisdom(): for lib in fftlib.values(): lib.forget_wisdom() def set_timelimit(limit): """Set time limit for planning Parameters ---------- limit : number The new time limit set for planning of serial transforms """ for lib in fftlib.values(): lib.set_timelimit(limit) # limit's precision handled by cython def cleanup(): for lib in fftlib.values(): lib.cleanup() mpi4py-fft-2.0.6/mpi4py_fft/fftw/fftw_planxfftn.c000066400000000000000000000046431462034230000217400ustar00rootroot00000000000000#include "fftw_planxfftn.h" enum { C2C_FORWARD = FFTW_FORWARD, C2C_BACKWARD = FFTW_BACKWARD, R2C = FFTW_FORWARD-1, C2R = FFTW_BACKWARD+1, }; fftw_plan fftw_planxfftn(int ndims, int sizes_in[ndims], void *_in, int sizes_out[ndims], void *_out, int naxes, int axes[naxes], int kind[naxes], unsigned flags) { fftw_iodim ranks[ndims], dims[ndims]; int i, j; int strides_in[ndims], strides_out[ndims], markers[ndims]; int *sizes = (kind[0] != C2R) ? sizes_in : sizes_out; strides_in[ndims-1] = 1; strides_out[ndims-1] = 1; for (i = ndims-2; i >= 0; i--) { strides_in[i] = sizes_in[i+1] * strides_in[i+1]; strides_out[i] = sizes_out[i+1] * strides_out[i+1]; } for (i = 0; i < ndims; i++) markers[i] = 0; for (i = 0; i < naxes; i++) { int axis = axes[i]; ranks[i].n = sizes[axis]; ranks[i].is = strides_in[axis]; ranks[i].os = strides_out[axis]; markers[axis] = 1; } for (i = 0, j = 0; i < ndims; i++) { if (markers[i]) continue; dims[j].n = sizes[i]; dims[j].is = strides_in[i]; dims[j].os = strides_out[i]; j++; } switch (kind[0]) { case C2C_FORWARD: case C2C_BACKWARD: return fftw_plan_guru_dft(naxes, ranks, ndims-naxes, dims, (fftw_complex *)_in, (fftw_complex *)_out, kind[0], flags); case R2C: return fftw_plan_guru_dft_r2c(naxes, ranks, ndims-naxes, dims, (fftw_real *)_in, (fftw_complex *)_out, flags); case C2R: return fftw_plan_guru_dft_c2r(naxes, ranks, ndims-naxes, dims, (fftw_complex *)_in, (fftw_real *)_out, flags); default: return fftw_plan_guru_r2r(naxes, ranks, ndims-naxes, dims, (fftw_real *)_in, (fftw_real *)_out, (fftw_r2r_kind *)kind, flags); } } mpi4py-fft-2.0.6/mpi4py_fft/fftw/fftw_planxfftn.h000066400000000000000000000010221462034230000217310ustar00rootroot00000000000000#include #include #ifndef fftw_planxfftn_h #define fftw_planxfftn_h typedef double fftw_real; fftw_plan fftw_planxfftn(int ndims, int sizes_in[ndims], void *_in, int sizes_out[ndims], void *_out, int naxes, int axes[naxes], int kind[naxes], unsigned flags); #endif mpi4py-fft-2.0.6/mpi4py_fft/fftw/fftw_xfftn.pxd000066400000000000000000000030431462034230000214270ustar00rootroot00000000000000# cython: language_level=3str cdef extern from "fftw3.h" nogil: ctypedef struct fftw_complex_struct: pass ctypedef fftw_complex_struct *fftw_complex ctypedef struct fftw_plan_struct: pass ctypedef fftw_plan_struct *fftw_plan void fftw_destroy_plan(fftw_plan) void fftw_execute_dft(fftw_plan, void *_in, void *_out) void fftw_execute_dft_c2r(fftw_plan, void *_in, void *_out) void fftw_execute_dft_r2c(fftw_plan, void *_in, void *_out) void fftw_execute_r2r(fftw_plan, void *_in, void *_out) void fftw_execute(fftw_plan) void fftw_init_threads() void fftw_plan_with_nthreads(int n) int fftw_export_wisdom_to_filename(const char *filename) int fftw_import_wisdom_from_filename(const char *filename) void fftw_forget_wisdom() void fftw_set_timelimit(double seconds) void fftw_cleanup() void fftw_cleanup_threads() int fftw_alignment_of(void *_in) void fftw_print_plan(fftw_plan) cdef extern from "fftw_planxfftn.h" nogil: ctypedef double fftw_real fftw_plan fftw_planxfftn(int ndims, int sizes_in[], void *_in, int sizes_out[], void *_out, int naxes, int axes[], int kind[], unsigned flags) ctypedef void (*generic_function)(void *plan, void *_in, void *_out) noexcept nogil mpi4py-fft-2.0.6/mpi4py_fft/fftw/fftw_xfftn.pyx000066400000000000000000000256321462034230000214640ustar00rootroot00000000000000# cython: language_level=3str from . cimport fftw_xfftn cimport numpy as np from .utilities import * import numpy as np from libc.stdint cimport intptr_t from libc.stdlib cimport malloc, free cpdef int alignment_of(input_array): cdef np.ndarray _input_array = input_array return fftw_alignment_of(np.PyArray_DATA(_input_array)) cpdef int export_wisdom(const char *filename): return fftw_export_wisdom_to_filename(filename) cpdef int import_wisdom(const char *filename): return fftw_import_wisdom_from_filename(filename) cpdef void forget_wisdom(): fftw_forget_wisdom() cpdef void set_timelimit(fftw_real limit): fftw_set_timelimit(limit) cpdef void cleanup(): fftw_cleanup() fftw_cleanup_threads() cdef void _fftw_execute_dft(void *plan, void *_in, void *_out) noexcept nogil: fftw_execute_dft(plan, _in, _out) cdef void _fftw_execute_dft_r2c(void *plan, void *_in, void *_out) noexcept nogil: fftw_execute_dft_r2c(plan, _in, _out) cdef void _fftw_execute_dft_c2r(void *plan, void *_in, void *_out) noexcept nogil: fftw_execute_dft_c2r(plan, _in, _out) cdef void _fftw_execute_r2r(void *plan, void *_in, void *_out) noexcept nogil: fftw_execute_r2r(plan, _in, _out) cdef generic_function _get_execute_function(kind): if kind in (C2C_FORWARD, C2C_BACKWARD): return _fftw_execute_dft elif kind == R2C: return _fftw_execute_dft_r2c elif kind == C2R: return _fftw_execute_dft_c2r return _fftw_execute_r2r cdef class FFT: """ Unified class for FFTs of multidimensional arrays This class is used for any type of transform defined in the user manual of `FFTW `_. Parameters ---------- input_array : array real or complex input array output_array : array real or complex output array axes : sequence of ints, optional The axes to transform over, starting from the last kind : int or sequence of ints, optional Any one of - FFTW_FORWARD (-1) - FFTW_R2HC (0) - FFTW_BACKWARD (1) - FFTW_HC2R (1) - FFTW_DHT (2) - FFTW_REDFT00 (3) - FFTW_REDFT01 (4) - FFTW_REDFT10 (5) - FFTW_REDFT11 (6) - FFTW_RODFT00 (7) - FFTW_RODFT01 (8) - FFTW_RODFT10 (9) - FFTW_RODFT11 (10) threads : int, optional Number of threads to use in transforms flags : int or sequence of ints, optional Any one of, but not necessarily for all transforms or all combinations - FFTW_MEASURE (0) - FFTW_DESTROY_INPUT (1) - FFTW_UNALIGNED (2) - FFTW_CONSERVE_MEMORY (4) - FFTW_EXHAUSTIVE (8) - FFTW_PRESERVE_INPUT (16) - FFTW_PATIENT (32) - FFTW_ESTIMATE (64) - FFTW_WISDOM_ONLY (2097152) normalization : float, optional Normalization factor """ cdef void *_plan cdef np.ndarray _input_array cdef np.ndarray _output_array cdef fftw_real _M cdef int kind cdef tuple input_shape cdef tuple output_shape cdef tuple input_strides cdef tuple output_strides def __cinit__(self, input_array, output_array, axes=(-1,), kind=FFTW_FORWARD, int threads=1, flags=FFTW_MEASURE, fftw_real normalization=1.0): cdef int ndims = len(input_array.shape) cdef int naxes = len(axes) cdef int flag, i cdef unsigned allflags cdef int *sz_in = malloc(ndims * sizeof(int)) cdef int *sz_out = malloc(ndims * sizeof(int)) cdef int *axs = malloc(naxes * sizeof(int)) cdef int *knd = malloc(naxes * sizeof(int)) cdef void *_in = np.PyArray_DATA(input_array) cdef void *_out = np.PyArray_DATA(output_array) self.input_shape = input_array.shape self.output_shape = output_array.shape self.input_strides = input_array.strides self.output_strides = output_array.strides fftw_plan_with_nthreads(threads) flags = [flags] if isinstance(flags, int) else flags kind = [kind] if isinstance(kind, int) else kind self.kind = kind[0] axes = list(axes) for i in range(naxes): if axes[i] < 0: axes[i] = axes[i] + ndims allflags = flags[0] for flag in flags[1:]: allflags |= flag self._input_array = input_array self._output_array = output_array self._M = normalization for i in range(ndims): sz_in[i] = input_array.shape[i] sz_out[i] = output_array.shape[i] for i in range(naxes): axs[i] = axes[i] for i in range(len(kind)): knd[i] = kind[i] self._plan = fftw_planxfftn(ndims, sz_in, _in, sz_out, _out, naxes, axs, knd, allflags) if self._plan == NULL: raise RuntimeError("Failure creating FFTW plan") free(sz_in) free(sz_out) free(axs) free(knd) def __dealloc__(self): self.destroy() def destroy(self): fftw_destroy_plan(self._plan) @property def input_array(self): return self._input_array @property def output_array(self): return self._output_array def print_plan(self): assert self._plan != NULL fftw_print_plan(self._plan) def update_arrays(self, input_array, output_array): assert self.input_shape == input_array.shape assert self.input_strides == input_array.strides assert self._input_array.dtype == input_array.dtype assert (np.PyArray_DATA(input_array) % get_alignment(self._input_array) == 0) assert self.output_shape == output_array.shape assert self.output_strides == output_array.strides assert self._output_array.dtype == output_array.dtype assert (np.PyArray_DATA(output_array) % get_alignment(self._output_array) == 0) self._input_array = input_array self._output_array = output_array def get_normalization(self): """Return the internally set normalization factor""" return self._M def __call__(self, input_array=None, output_array=None, implicit=True, normalize=False, **kw): """ Signature:: __call__(input_array=None, output_array=None, implicit=True, normalize=False, **kw) Compute transform and return output array Parameters ---------- input_array : array, optional If not provided, then use internally stored array output_array : array, optional If not provided, then use internally stored array implicit : bool, optional If True, then use an implicit method that acts by applying the plan directly on the given input array. If False, then use an explicit method that first copies the given input_array into the internal _input_array. The explicit method is generally safer, because it always preserves the provided input_array. The implicit method can be faster because it may be done without any copying. However, the contents of the input_array may be destroyed during computation. So use with care! normalize : bool, optional If True, normalize transform with internally stored normalization factor. The internally set normalization factor is possible to obtain through :func:`FFT.get_normalization` kw : dict, optional Note ---- If the transform has been planned with FFTW_PRESERVE_INPUT, then both the two methods (implicit=True/False) will preserve the provided input_array. If not planned with this flag, then the implicit=True method may cause the input_array to be overwritten during computation. """ if implicit: return self._apply_implicit(input_array, output_array, normalize, **kw) return self._apply_explicit(input_array, output_array, normalize, **kw) def _apply_explicit(self, input_array, output_array, normalize, **kw): """Apply plan with explicit (and safe) update of work arrays""" if input_array is not None: self._input_array[...] = input_array assert self._plan != NULL with nogil: fftw_execute(self._plan) if normalize: self._output_array *= self._M if output_array is not None: output_array[...] = self._output_array return output_array return self._output_array def _apply_implicit(self, input_array, output_array, normalize, **kw): """Apply plan with direct use of work arrays if possible This version of apply will use the provided input and output arrays instead of the original (self._input_array, self._output_array) that were used to plan the transform. Since planning takes the alignment of arrays into consideration, we need to make sure that the alignment of the new arrays match the originals. Other than that we also make sure that the new arrays have the correct shape, strides and type. """ cdef void *_in cdef void *_out cdef generic_function apply_plan = _get_execute_function(self.kind) if input_array is not None: try: assert self.input_shape == input_array.shape assert self.input_strides == input_array.strides assert self._input_array.dtype == input_array.dtype assert (np.PyArray_DATA(input_array) % get_alignment(self._input_array) == 0) except AssertionError: self._input_array[...] = input_array input_array = self._input_array else: input_array = self._input_array if output_array is not None: assert self.output_shape == output_array.shape assert self.output_strides == output_array.strides assert self._output_array.dtype == output_array.dtype assert (np.PyArray_DATA(output_array) % get_alignment(self._output_array) == 0), \ "output_array has wrong alignment" else: output_array = self._output_array _in = np.PyArray_DATA(input_array) _out = np.PyArray_DATA(output_array) assert self._plan != NULL with nogil: apply_plan(self._plan, _in, _out) if normalize: output_array *= self._M return output_array mpi4py-fft-2.0.6/mpi4py_fft/fftw/utilities.pyx000066400000000000000000000046351462034230000213240ustar00rootroot00000000000000#cython: language_level=3str cimport numpy as np import numpy as np from libc.stdint cimport intptr_t cpdef enum: FFTW_FORWARD = -1 FFTW_R2HC = 0 FFTW_BACKWARD = 1 FFTW_HC2R = 1 FFTW_DHT = 2 FFTW_REDFT00 = 3 FFTW_REDFT01 = 4 FFTW_REDFT10 = 5 FFTW_REDFT11 = 6 FFTW_RODFT00 = 7 FFTW_RODFT01 = 8 FFTW_RODFT10 = 9 FFTW_RODFT11 = 10 cpdef enum: C2C_FORWARD = -1 C2C_BACKWARD = 1 R2C = -2 C2R = 2 cpdef enum: FFTW_MEASURE = 0 FFTW_DESTROY_INPUT = 1 FFTW_UNALIGNED = 2 FFTW_CONSERVE_MEMORY = 4 FFTW_EXHAUSTIVE = 8 FFTW_PRESERVE_INPUT = 16 FFTW_PATIENT = 32 FFTW_ESTIMATE = 64 FFTW_WISDOM_ONLY = 2097152 cpdef int get_alignment(array): """Return alignment assuming highest allowed is 32 Parameters ---------- array : array """ cdef int i, n cdef intptr_t addr = np.PyArray_DATA(array) for i in range(5, -1, -1): n = 1 << i if addr % n == 0: break return n cpdef aligned(shape, n=32, dtype=np.dtype('d'), fill=None): """Returned array with byte-alignment according to n Parameters ---------- shape : sequence of ints The shape of the array to be created n : int, optional The chosen byte-alignment dtype : np.dtype, optional The type of the returned array fill : None or number, optional If number then fill returned array with this number, otherwise return empty array Returns ------- array byte-aligned array """ dtype = np.dtype(dtype) M = np.prod(shape)*dtype.itemsize a = np.empty(M+n, dtype=np.dtype('uint8')) offset = a.ctypes.data % n offset = 0 if offset == 0 else (n - offset) b = np.frombuffer(a[offset:(offset+M)].data, dtype=dtype).reshape(shape) if fill is not None: assert isinstance(fill, int) b[...] = fill return b cpdef aligned_like(z, fill=None): """Return array with byte-alignment, shape and type like array z Parameters ---------- z : array An array with shape and type we want to recreate fill : None or number, optional If number then fill returned array with this number, otherwise return empty array Returns ------- array byte-aligned array """ n = get_alignment(z) return aligned(z.shape, n=n, dtype=z.dtype, fill=fill) mpi4py-fft-2.0.6/mpi4py_fft/fftw/xfftn.py000066400000000000000000000663711462034230000202530ustar00rootroot00000000000000#pylint: disable=no-name-in-module,unused-import import numpy as np from .factory import get_planned_FFT from .utilities import FFTW_FORWARD, FFTW_BACKWARD, FFTW_REDFT00, FFTW_REDFT01, \ FFTW_REDFT10, FFTW_REDFT11, FFTW_RODFT00, FFTW_RODFT01, FFTW_RODFT10, \ FFTW_RODFT11, FFTW_MEASURE, FFTW_DESTROY_INPUT, FFTW_UNALIGNED, \ FFTW_CONSERVE_MEMORY, FFTW_EXHAUSTIVE, FFTW_PRESERVE_INPUT, FFTW_PATIENT, \ FFTW_ESTIMATE, FFTW_WISDOM_ONLY, C2C_FORWARD, C2C_BACKWARD, R2C, C2R, \ FFTW_R2HC, FFTW_HC2R, FFTW_DHT, get_alignment, aligned, aligned_like flag_dict = {key: val for key, val in locals().items() if key.startswith('FFTW_')} dct_type = { 1: FFTW_REDFT00, 2: FFTW_REDFT10, 3: FFTW_REDFT01, 4: FFTW_REDFT11} idct_type = { 1: FFTW_REDFT00, 2: FFTW_REDFT01, 3: FFTW_REDFT10, 4: FFTW_REDFT11} dst_type = { 1: FFTW_RODFT00, 2: FFTW_RODFT10, 3: FFTW_RODFT01, 4: FFTW_RODFT11} idst_type = { 1: FFTW_RODFT00, 2: FFTW_RODFT01, 3: FFTW_RODFT10, 4: FFTW_RODFT11} def fftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return complex-to-complex forward transform object Parameters ---------- input_array : complex array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the FFT. threads : int, optional Number of threads used in computing FFT. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : complex array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for complex-to-complex transforms Note ---- This routine does not compute the fftn, it merely returns an instance of a class that can do it. The contents of the input_array may be overwritten during planning. Make sure to keep a copy if needed. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import fftn as plan_fftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='D') >>> fftn = plan_fftn(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = fftn() >>> print(B) [10.+0.j -2.+2.j -2.+0.j -2.-2.j] >>> assert id(A) == id(fftn.input_array) >>> assert id(B) == id(fftn.output_array) """ kind = FFTW_FORWARD assert input_array.dtype.char in 'FDG' if output_array is None: n = get_alignment(input_array) output_array = aligned(input_array.shape, n, input_array.dtype.char.upper()) else: assert input_array.shape == output_array.shape assert output_array.dtype.char == input_array.dtype.char.upper() M = np.prod(np.take(input_array.shape, axes)) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, 1.0/M) def ifftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """ Return complex-to-complex inverse transform object Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the inverse FFT. threads : int, optional Number of threads used in computing FFT. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for complex-to-complex inverse transforms Note ---- This routine does not compute the ifftn, it merely returns an instance of a class that can do it. The contents of the input_array may be overwritten during planning. Make sure that you keep a copy if needed. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import ifftn as plan_ifftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, FFTW_PRESERVE_INPUT, aligned >>> A = aligned(4, dtype='D') >>> ifftn = plan_ifftn(A, flags=(FFTW_ESTIMATE, FFTW_PRESERVE_INPUT)) >>> A[:] = 1, 2, 3, 4 >>> B = ifftn() >>> print(B) [10.+0.j -2.-2.j -2.+0.j -2.+2.j] >>> assert id(B) == id(ifftn.output_array) >>> assert id(A) == id(ifftn.input_array) """ kind = FFTW_BACKWARD assert input_array.dtype.char in 'FDG' if output_array is None: output_array = aligned_like(input_array) else: assert input_array.shape == output_array.shape M = np.prod(np.take(input_array.shape, axes)) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, 1.0/M) def rfftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return real-to-complex transform object Parameters ---------- input_array : real array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the real to complex FFT. threads : int, optional Number of threads used in computing FFT. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-complex transforms Note ---- This routine does not compute the rfftn, it merely returns an instance of a class that can do it. The contents of the input_array may be overwritten during planning. Make sure that you keep a copy if needed. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import rfftn as plan_rfftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> rfftn = plan_rfftn(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = rfftn() >>> print(B) [10.+0.j -2.+2.j -2.+0.j] >>> assert id(A) == id(rfftn.input_array) >>> assert id(B) == id(rfftn.output_array) """ kind = R2C assert input_array.dtype.char in 'fdg' if output_array is None: sz = list(input_array.shape) sz[axes[-1]] = input_array.shape[axes[-1]]//2+1 dtype = input_array.dtype.char n = get_alignment(input_array) output_array = aligned(sz, n=n, dtype=np.dtype(dtype.upper())) else: assert input_array.shape[axes[-1]]//2+1 == output_array.shape[axes[-1]] M = np.prod(np.take(input_array.shape, axes)) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, 1.0/M) def irfftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return inverse complex-to-real transform object Parameters ---------- input_array : array s : sequence of ints, optional Shape of output array along each of the transformed axes. Must be same length as axes (len(s) == len(axes)). If not given it is assumed that the shape of the output along the first transformed axis (i.e., axes[-1]) is an even number. It is not possible to determine exactly, because for a real transform the output of a real array of length N is N//2+1. However, both N=4 and N=5 gives 4//2+1=3 and 5//2+1=3, so it is not possible to determine whether 4 or 5 is correct. Hence it must be given. axes : sequence of ints, optional Axes over which to compute the real to complex FFT. threads : int, optional Number of threads used in computing FFT. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for complex-to-real transforms Note ---- This routine does not compute the irfftn, it merely returns an instance of a class that can do it. The irfftn is not possible to use with the FFTW_PRESERVE_INPUT flag. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import irfftn as plan_irfftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='D') >>> irfftn = plan_irfftn(A, flags=(FFTW_ESTIMATE,)) # no shape given for output >>> A[:] = 1, 2, 3, 4 >>> B = irfftn() >>> print(B) [15. -4. 0. -1. 0. -4.] >>> irfftn = plan_irfftn(A, s=(7,), flags=(FFTW_ESTIMATE,)) # output shape given >>> B = irfftn() >>> print(B) [19. -5.04891734 -0.30797853 -0.64310413 -0.64310413 -0.30797853 -5.04891734] >>> assert id(B) == id(irfftn.output_array) >>> assert id(A) == id(irfftn.input_array) """ kind = C2R assert input_array.dtype.char in 'FDG' assert FFTW_PRESERVE_INPUT not in flags sz = list(input_array.shape) if s is not None: assert len(axes) == len(s) for q, axis in zip(s, axes): sz[axis] = q else: sz[axes[-1]] = 2*sz[axes[-1]]-2 if output_array is None: dtype = input_array.dtype.char n = get_alignment(input_array) output_array = aligned(sz, n=n, dtype=np.dtype(dtype.lower())) else: assert list(output_array.shape) == sz assert sz[axes[-1]]//2+1 == input_array.shape[axes[-1]] M = np.prod(np.take(output_array.shape, axes)) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, 1.0/M) def dctn(input_array, s=None, axes=(-1,), type=2, threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return discrete cosine transform object Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the real-to-real dct. type : int, optional Type of `dct `_ - 1 - FFTW_REDFT00 - 2 - FFTW_REDFT10, - 3 - FFTW_REDFT01, - 4 - FFTW_REDFT11 threads : int, optional Number of threads used in computing dct. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-real dct transforms of given type Note ---- This routine does not compute the dct, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import dctn as plan_dct >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> dct = plan_dct(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = dct() >>> print(B) [20. -6.30864406 0. -0.44834153] >>> assert id(A) == id(dct.input_array) >>> assert id(B) == id(dct.output_array) """ assert input_array.dtype.char in 'fdg' if output_array is None: output_array = aligned_like(input_array) else: assert input_array.shape == output_array.shape kind = dct_type[type] kind = [kind]*len(axes) M = get_normalization(kind, input_array.shape, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def idctn(input_array, s=None, axes=(-1,), type=2, threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return inverse discrete cosine transform object Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the real-to-real idct. type : int, optional Type of `idct `_ - 1 - FFTW_REDFT00 - 2 - FFTW_REDFT01 - 3 - FFTW_REDFT10 - 4 - FFTW_REDFT11 threads : int, optional Number of threads used in computing idct. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-real idct transforms of given type Note ---- This routine does not compute the idct, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import idctn as plan_idct >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> idct = plan_idct(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = idct() >>> print(B) [11.99962628 -9.10294322 2.61766184 -1.5143449 ] >>> assert id(A) == id(idct.input_array) >>> assert id(B) == id(idct.output_array) """ assert input_array.dtype.char in 'fdg' if output_array is None: output_array = aligned_like(input_array) else: assert input_array.shape == output_array.shape kind = idct_type[type] kind = [kind]*len(axes) M = get_normalization(kind, input_array.shape, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def dstn(input_array, s=None, axes=(-1,), type=2, threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return discrete sine transform object Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the real-to-real dst. type : int, optional Type of `dst `_ - 1 - FFTW_RODFT00 - 2 - FFTW_RODFT10 - 3 - FFTW_RODFT01 - 4 - FFTW_RODFT11 threads : int, optional Number of threads used in computing dst. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-real dst transforms of given type Note ---- This routine does not compute the dst, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import dstn as plan_dst >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> dst = plan_dst(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = dst() >>> print(B) [13.06562965 -5.65685425 5.411961 -4. ] >>> assert id(A) == id(dst.input_array) >>> assert id(B) == id(dst.output_array) """ assert input_array.dtype.char in 'fdg' if output_array is None: output_array = aligned_like(input_array) else: assert input_array.shape == output_array.shape kind = dst_type[type] kind = [kind]*len(axes) M = get_normalization(kind, input_array.shape, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def idstn(input_array, s=None, axes=(-1,), type=2, threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return inverse discrete sine transform object Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the real-to-real inverse dst. type : int, optional Type of `idst `_ - 1 - FFTW_RODFT00 - 2 - FFTW_RODFT01 - 3 - FFTW_RODFT10 - 4 - FFTW_RODFT11 threads : int, optional Number of threads used in computing inverse dst. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-real idst transforms of given type Note ---- This routine does not compute the idst, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import idstn as plan_idst >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> idst = plan_idst(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = idst() >>> print(B) [13.13707118 -1.6199144 0.72323135 -0.51978306] >>> assert id(A) == id(idst.input_array) >>> assert id(B) == id(idst.output_array) """ assert input_array.dtype.char in 'fdg' if output_array is None: output_array = aligned_like(input_array) else: assert input_array.shape == output_array.shape kind = idst_type[type] kind = [kind]*len(axes) M = get_normalization(kind, input_array.shape, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def ihfftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return inverse transform object for an array with Hermitian symmetry Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the ihfftn. threads : int, optional Number of threads used in computing ihfftn. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for real-to-complex ihfftn transforms Note ---- This routine does not compute the ihfttn, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import ihfftn as plan_ihfftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='d') >>> ihfftn = plan_ihfftn(A, flags=(FFTW_ESTIMATE,)) >>> A[:] = 1, 2, 3, 4 >>> B = ihfftn() >>> print(B) [10.+0.j -2.+2.j -2.+0.j] >>> assert id(A) == id(ihfftn.input_array) >>> assert id(B) == id(ihfftn.output_array) """ kind = R2C assert input_array.dtype.char in 'fdg' if output_array is None: dtype = input_array.dtype.char sz = list(input_array.shape) sz[axes[-1]] = input_array.shape[axes[-1]]//2+1 n = get_alignment(input_array) output_array = aligned(sz, n=n, dtype=np.dtype(dtype.upper())) else: assert input_array.shape[axes[-1]]//2+1 == output_array.shape[axes[-1]] M = get_normalization(kind, input_array.shape, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def hfftn(input_array, s=None, axes=(-1,), threads=1, flags=(FFTW_MEASURE,), output_array=None): """Return transform object for an array with Hermitian symmetry Parameters ---------- input_array : array s : sequence of ints, optional Not used - included for compatibility with Numpy axes : sequence of ints, optional Axes over which to compute the hfftn. threads : int, optional Number of threads used in computing hfftn. flags : sequence of ints, optional Flags from - FFTW_MEASURE - FFTW_EXHAUSTIVE - FFTW_PATIENT - FFTW_DESTROY_INPUT - FFTW_PRESERVE_INPUT - FFTW_UNALIGNED - FFTW_CONSERVE_MEMORY - FFTW_ESTIMATE output_array : array, optional Array to be used as output array. Must be of correct shape, type, strides and alignment Returns ------- :class:`.fftwf_xfftn.FFT`, :class:`.fftw_xfftn.FFT` or :class:`.fftwl_xfftn.FFT` An instance of the return type configured for complex-to-real hfftn transforms Note ---- This routine does not compute the hfttn, it merely returns an instance of a class that can do it. Examples -------- >>> import numpy as np >>> from mpi4py_fft.fftw import hfftn as plan_hfftn >>> from mpi4py_fft.fftw import FFTW_ESTIMATE, aligned >>> A = aligned(4, dtype='D') >>> hfftn = plan_hfftn(A, flags=(FFTW_ESTIMATE,)) # no shape given for output >>> A[:] = 1, 2, 3, 4 >>> B = hfftn() >>> print(B) [15. -4. 0. -1. 0. -4.] >>> hfftn = plan_hfftn(A, s=(7,), flags=(FFTW_ESTIMATE,)) # output shape given >>> B = hfftn() >>> print(B) [19. -5.04891734 -0.30797853 -0.64310413 -0.64310413 -0.30797853 -5.04891734] >>> assert id(B) == id(hfftn.output_array) >>> assert id(A) == id(hfftn.input_array) """ kind = C2R assert input_array.dtype.char in 'FDG' sz = list(input_array.shape) if s is not None: assert len(axes) == len(s) for q, axis in zip(s, axes): sz[axis] = q else: sz[axes[-1]] = 2*sz[axes[-1]]-2 if output_array is None: dtype = input_array.dtype.char n = get_alignment(input_array) output_array = aligned(sz, n=n, dtype=np.dtype(dtype.lower())) else: assert list(output_array.shape) == sz assert sz[axes[-1]]//2+1 == input_array.shape[axes[-1]] M = get_normalization(kind, sz, axes) return get_planned_FFT(input_array, output_array, axes, kind, threads, flags, M) def get_normalization(kind, shape, axes): """Return normalization factor for multidimensional transform The normalization factor is, for Fourier transforms:: 1./np.prod(np.take(shape, axes)) where shape is the global shape of the array that is input to the forward transform, and axes are the axes transformed over. For real-to-real transforms the normalization factor for each axis is - REDFT00 - 2(N-1) - REDFT01 - 2N - REDFT10 - 2N - REDFT11 - 2N - RODFT00 - 2(N+1) - RODFT01 - 2N - RODFT10 - 2N - RODFT11 - 2N where N is the length of the input array along that axis. Parameters ---------- kind : sequence of ints The kind of transform along each axis shape : sequence of ints The shape of the global transformed array (input to the forward transform) axes : sequence of ints The axes transformed over Note ---- The returned normalization factor is the *inverse* of the product of the normalization factors for the axes it is transformed over. """ kind = [kind]*len(axes) if isinstance(kind, int) else kind assert len(kind) == len(axes) M = 1 for knd, axis in zip(kind, axes): N = shape[axis] if knd == FFTW_RODFT00: M *= 2*(N+1) elif knd == FFTW_REDFT00: M *= 2*(N-1) elif knd in (FFTW_RODFT01, FFTW_RODFT10, FFTW_RODFT11, FFTW_REDFT01, FFTW_REDFT10, FFTW_REDFT11): M *= 2*N else: M *= N return 1./M inverse = { FFTW_RODFT11: FFTW_RODFT11, FFTW_REDFT11: FFTW_REDFT11, FFTW_RODFT01: FFTW_RODFT10, FFTW_RODFT10: FFTW_RODFT01, FFTW_REDFT01: FFTW_REDFT10, FFTW_REDFT10: FFTW_REDFT01, FFTW_RODFT00: FFTW_RODFT00, FFTW_REDFT00: FFTW_REDFT00, rfftn: irfftn, irfftn: rfftn, fftn: ifftn, ifftn: fftn, dctn: idctn, idctn: dctn, dstn: idstn, idstn: dstn, hfftn: ihfftn, ihfftn: hfftn } mpi4py-fft-2.0.6/mpi4py_fft/io/000077500000000000000000000000001462034230000162005ustar00rootroot00000000000000mpi4py-fft-2.0.6/mpi4py_fft/io/__init__.py000066400000000000000000000001621462034230000203100ustar00rootroot00000000000000from .h5py_file import * from .nc_file import * from .file_base import * from .generate_xdmf import generate_xdmf mpi4py-fft-2.0.6/mpi4py_fft/io/file_base.py000066400000000000000000000111411462034230000204610ustar00rootroot00000000000000from mpi4py import MPI import numpy as np __all__ = ('FileBase',) comm = MPI.COMM_WORLD class FileBase(object): """Base class for reading/writing distributed arrays Parameters ---------- filename : str, optional Name of backend file used to store data domain : sequence, optional An optional spatial mesh or domain to go with the data. Sequence of either - 2-tuples, where each 2-tuple contains the (origin, length) of each dimension, e.g., (0, 2*pi). - Arrays of coordinates, e.g., np.linspace(0, 2*pi, N). One array per dimension. """ def __init__(self, filename=None, domain=None): self.f = None self.filename = filename self.domain = domain def _check_domain(self, group, field): """Check dimensions and store (if missing) self.domain""" raise NotImplementedError def write(self, step, fields, **kw): """Write snapshot ``step`` of ``fields`` to file Parameters ---------- step : int Index of snapshot. fields : dict The fields to be dumped to file. (key, value) pairs are group name and either arrays or 2-tuples, respectively. The arrays are complete arrays to be stored, whereas 2-tuples are arrays with associated *global* slices. as_scalar : boolean, optional Whether to store rank > 0 arrays as scalars. Default is False. """ as_scalar = kw.get("as_scalar", False) def _write(group, u, sl, step, kw, k=None): if sl is None: self._write_group(group, u, step, **kw) else: self._write_slice_step(group, step, sl, u, **kw) for group, list_of_fields in fields.items(): assert isinstance(list_of_fields, (tuple, list)) assert isinstance(group, str) for field in list_of_fields: u = field[0] if isinstance(field, (tuple, list)) else field sl = field[1] if isinstance(field, (tuple, list)) else None if as_scalar is False or u.rank == 0: self._check_domain(group, u) _write(group, u, sl, step, kw) else: # as_scalar is True and u.rank > 0 if u.rank == 1: for k in range(u.shape[0]): g = group + str(k) self._check_domain(g, u[k]) _write(g, u[k], sl, step, kw) elif u.rank == 2: for k in range(u.shape[0]): for l in range(u.shape[1]): g = group + str(k) + str(l) self._check_domain(g, u[k, l]) _write(g, u[k, l], sl, step, kw) def read(self, u, name, **kw): """Read field ``name`` into distributed array ``u`` Parameters ---------- u : array The :class:`.DistArray` to read into. name : str Name of field to be read. step : int, optional Index of field to be read. Default is 0. """ raise NotImplementedError def close(self): """Close the self.filename file""" self.f.close() def open(self, mode='r+'): """Open the self.filename file for reading or writing Parameters ---------- mode : str Open file in this mode. Default is 'r+'. """ raise NotImplementedError @staticmethod def backend(): """Return which backend is used to store data""" raise NotImplementedError def _write_slice_step(self, name, step, slices, field, **kwargs): raise NotImplementedError def _write_group(self, name, u, step, **kwargs): raise NotImplementedError @staticmethod def _get_slice_name(slices): sl = list(slices) slname = '' for ss in sl: if isinstance(ss, slice): slname += 'slice_' else: slname += str(ss)+'_' return slname[:-1] @staticmethod def _get_local_slices(slices, s): # Check if data is on this processor and make slices local inside = 1 si = np.nonzero([isinstance(x, int) and not z == slice(None) for x, z in zip(slices, s)])[0] for i in si: if slices[i] >= s[i].start and slices[i] < s[i].stop: slices[i] -= s[i].start else: inside = 0 return slices, inside mpi4py-fft-2.0.6/mpi4py_fft/io/generate_xdmf.py000066400000000000000000000270201462034230000213630ustar00rootroot00000000000000# pylint: disable=line-too-long import copy import re from numpy import dtype, array, invert, take __all__ = ('generate_xdmf',) xdmffile = """ {2} """ def get_grid(geometry, topology, attrs): return """ {0} {1} {2} """.format(geometry, topology, attrs) def get_geometry(kind=0, dim=2): assert kind in (0, 1) assert dim in (2, 3) if dim == 2: if kind == 0: return """ {0} {1} {2} {3} """ return """ {3}:{6}/mesh/{4} {3}:{6}/mesh/{5} 0 """ if dim == 3: if kind == 0: return """ {0} {1} {2} {3} {4} {5} """ return """ {4}:{8}/mesh/{5} {4}:{8}/mesh/{6} {4}:{8}/mesh/{7} """ def get_topology(dims, kind=0): assert len(dims) in (2, 3) co = 'Co' if kind == 0 else '' if len(dims) == 2: return """""".format(dims[0], dims[1], co) if len(dims) == 3: return """""".format(dims[0], dims[1], dims[2], co) def get_attribute(attr, h5filename, dims, prec): name = attr.split("/")[0] assert len(dims) in (2, 3) if len(dims) == 2: return """ {3}:/{4} """.format(name, dims[0], dims[1], h5filename, attr, prec) return """ {4}:/{5} """.format(name, dims[0], dims[1], dims[2], h5filename, attr, prec) def generate_xdmf(h5filename, periodic=True, order='paraview'): """Generate XDMF-files Parameters ---------- h5filename : str Name of hdf5-file that we want to decorate with xdmf periodic : bool or dim-sequence of bools, optional If true along axis i, assume data is periodic. Only affects the calculation of the domain size and only if the domain is given as 2-tuple of origin+dx. order : str ``paraview`` or ``visit`` For some reason Paraview and Visit requires the mesh stored in opposite order in the XDMF-file for 2D slices. Make choice of order here. """ import h5py f = h5py.File(h5filename, 'a') keys = [] f.visit(keys.append) assert order.lower() in ('paraview', 'visit') # Find unique scalar groups of 2D and 3D datasets datasets = {2:{}, 3:{}} for key in keys: if f[key.split('/')[0]].attrs['rank'] > 0: continue if isinstance(f[key], h5py.Dataset): if not ('mesh' in key or 'domain' in key or 'Vector' in key): tstep = int(key.split("/")[-1]) ndim = int(key.split("/")[1][0]) if ndim in (2, 3): ds = datasets[ndim] if tstep in ds: ds[tstep] += [key] else: ds[tstep] = [key] if periodic is True: periodic = [0]*5 elif periodic is False: periodic = [1]*5 else: assert isinstance(periodic, (tuple, list)) periodic = list(array(invert(periodic), int)) coor = ['x0', 'x1', 'x2', 'x3', 'x4'] for ndim, dsets in datasets.items(): timesteps = list(dsets.keys()) per = copy.copy(periodic) if not timesteps: continue timesteps.sort(key=int) tt = "" for i in timesteps: tt += "%s " %i datatype = f[dsets[timesteps[0]][0]].dtype assert datatype.char not in 'FDG', "Cannot use generate_xdmf to visualize complex data." prec = 4 if datatype is dtype('float32') else 8 xff = {} geometry = {} topology = {} attrs = {} grid = {} NN = {} for name in dsets[timesteps[0]]: group = name.split('/')[0] if 'slice' in name: slices = name.split("/")[2] else: slices = 'whole' cc = copy.copy(coor) if slices not in xff: xff[slices] = copy.copy(xdmffile) N = list(f[name].shape) kk = 0 sl = 0 if 'slice' in slices: ss = slices.split("_") ii = [] for i, sx in enumerate(ss): if 'slice' in sx: ii.append(i) else: if len(f[group].attrs.get('shape')) == 3: # 2D slice in 3D domain kk = i sl = int(sx) N.insert(i, 1) cc = take(coor, ii) else: ii = list(range(ndim)) NN[slices] = N if 'domain' in f[group].keys(): if ndim == 2 and ('slice' not in slices or len(f[group].attrs.get('shape')) > 3): geo = get_geometry(kind=0, dim=2) assert len(ii) == 2 i, j = ii if order.lower() == 'paraview': data = [f[group+'/domain/{}'.format(coor[i])][0], f[group+'/domain/{}'.format(coor[j])][0], f[group+'/domain/{}'.format(coor[i])][1]/(N[0]-per[i]), f[group+'/domain/{}'.format(coor[j])][1]/(N[1]-per[j])] geometry[slices] = geo.format(*data) else: data = [f[group+'/domain/{}'.format(coor[j])][0], f[group+'/domain/{}'.format(coor[i])][0], f[group+'/domain/{}'.format(coor[j])][1]/(N[0]-per[j]), f[group+'/domain/{}'.format(coor[i])][1]/(N[1]-per[i])] geometry[slices] = geo.format(*data) else: if ndim == 2: ii.insert(kk, kk) per[kk] = 0 i, j, k = ii geo = get_geometry(kind=0, dim=3) data = [f[group+'/domain/{}'.format(coor[i])][0], f[group+'/domain/{}'.format(coor[j])][0], f[group+'/domain/{}'.format(coor[k])][0], f[group+'/domain/{}'.format(coor[i])][1]/(N[0]-per[i]), f[group+'/domain/{}'.format(coor[j])][1]/(N[1]-per[j]), f[group+'/domain/{}'.format(coor[k])][1]/(N[2]-per[k])] if ndim == 2: origin, dx = f[group+'/domain/x{}'.format(kk)] M = f[group].attrs.get('shape') pos = origin+dx/(M[kk]-per[kk])*sl data[kk] = pos data[kk+3] = pos geometry[slices] = geo.format(*data) topology[slices] = get_topology(N, kind=0) elif 'mesh' in f[group].keys(): if ndim == 2 and ('slice' not in slices or len(f[group].attrs.get('shape')) > 3): geo = get_geometry(kind=1, dim=2) else: geo = get_geometry(kind=1, dim=3) if ndim == 2 and ('slice' not in slices or len(f[group].attrs.get('shape')) > 3): if order.lower() == 'paraview': sig = (prec, N[0], N[1], h5filename, cc[0], cc[1], group) else: sig = (prec, N[1], N[0], h5filename, cc[1], cc[0], group) else: if ndim == 2: # 2D slice in 3D domain pos = f[group+"/mesh/x{}".format(kk)][sl] z = re.findall(r'', geo, re.DOTALL) geo = geo.replace(z[2-kk], ' Format="XML" NumberType="Float" Precision="{0}" Dimensions="{%d}">\n {%d}\n '%(1+kk, 7-kk)) cc = list(cc) cc.insert(kk, pos) sig = (prec, N[0], N[1], N[2], h5filename, cc[2], cc[1], cc[0], group) geometry[slices] = geo.format(*sig) topology[slices] = get_topology(N, kind=1) grid[slices] = '' # if slice of data, need to know along which axes # Since there may be many different slices, we need to create # one xdmf-file for each composition of slices attrs = {} for tstep in timesteps: d = dsets[tstep] slx = set() for i, x in enumerate(d): slices = x.split("/")[2] if not 'slice' in slices: slices = 'whole' N = NN[slices] if slices not in attrs: attrs[slices] = '' attrs[slices] += get_attribute(x, h5filename, N, prec) slx.add(slices) for slices in slx: grid[slices] += get_grid(geometry[slices], topology[slices], attrs[slices].rstrip()) attrs[slices] = '' for slices, ff in xff.items(): if 'slice' in slices: fname = h5filename[:-3]+"_"+slices+".xdmf" else: fname = h5filename[:-3]+".xdmf" xfl = open(fname, "w") h = ff.format(tt, len(timesteps), grid[slices].rstrip()) xfl.write(h) xfl.close() f.close() if __name__ == "__main__": import sys generate_xdmf(sys.argv[-1]) mpi4py-fft-2.0.6/mpi4py_fft/io/h5py_file.py000066400000000000000000000134011462034230000204350ustar00rootroot00000000000000import numpy as np from mpi4py import MPI from .file_base import FileBase __all__ = ('HDF5File',) comm = MPI.COMM_WORLD class HDF5File(FileBase): """Class for reading/writing data to HDF5 format Parameters ---------- h5name : str Name of hdf5 file to be created. domain : sequence, optional An optional spatial mesh or domain to go with the data. Sequence of either - 2-tuples, where each 2-tuple contains the (origin, length) of each dimension, e.g., (0, 2*pi). - Arrays of coordinates, e.g., np.linspace(0, 2*pi, N). One array per dimension. mode : str, optional ``r``, ``w`` or ``a`` for read, write or append. Default is ``a``. kw : dict, optional Optional additional keyword arguments used when creating the file used to store data. """ def __init__(self, h5name, domain=None, mode='a', **kw): FileBase.__init__(self, h5name, domain=domain) import h5py self.f = h5py.File(h5name, mode, driver="mpio", comm=comm, **kw) self.close() def _check_domain(self, group, field): if self.domain is None: self.domain = ((0, 2*np.pi),)*field.dimensions assert len(self.domain) == field.dimensions self.f.require_group(group) if not "shape" in self.f[group].attrs: self.f[group].attrs.create("shape", field.pencil.shape) if not "rank" in self.f[group].attrs: self.f[group].attrs.create("rank", field.rank) assert field.rank == self.f[group].attrs["rank"] assert np.all(field.pencil.shape == self.f[group].attrs["shape"]) if isinstance(self.domain[0], np.ndarray): self.f[group].require_group("mesh") else: self.f[group].require_group("domain") for i in range(field.dimensions): d = self.domain[i] if isinstance(d, np.ndarray): d0 = np.squeeze(d) self.f[group]["mesh"].require_dataset("x{}".format(i), shape=d0.shape, dtype=d0.dtype, data=d0) else: d0 = np.array([d[0], d[1]]) self.f[group]["domain"].require_dataset("x{}".format(i), shape=d0.shape, dtype=d0.dtype, data=d0) @staticmethod def backend(): return 'hdf5' def open(self, mode='r+'): import h5py self.f = h5py.File(self.filename, mode, driver="mpio", comm=comm) def write(self, step, fields, **kw): """Write snapshot ``step`` of ``fields`` to HDF5 file Parameters ---------- step : int Index of snapshot. fields : dict The fields to be dumped to file. (key, value) pairs are group name and either arrays or 2-tuples, respectively. The arrays are complete arrays to be stored, whereas 2-tuples are arrays with associated *global* slices. as_scalar : boolean, optional Whether to store rank > 0 arrays as scalars. Default is False. Example ------- >>> from mpi4py import MPI >>> from mpi4py_fft import PFFT, HDF5File, newDistArray >>> comm = MPI.COMM_WORLD >>> T = PFFT(comm, (15, 16, 17)) >>> u = newDistArray(T, forward_output=False, val=1) >>> v = newDistArray(T, forward_output=False, val=2) >>> f = HDF5File('h5filename.h5', mode='w') >>> f.write(0, {'u': [u, (u, [slice(None), 4, slice(None)])], ... 'v': [v, (v, [slice(None), 5, 5])]}) >>> f.write(1, {'u': [u, (u, [slice(None), 4, slice(None)])], ... 'v': [v, (v, [slice(None), 5, 5])]}) This stores data within two main groups ``u`` and ``v``. The HDF5 file will in the end contain groups:: /u/3D/{0, 1} /u/2D/slice_4_slice/{0, 1} /v/3D/{0, 1} /v/1D/slice_5_5/{0, 1} Note ---- The list of slices used in storing only parts of the arrays are views of the *global* arrays. """ self.open() FileBase.write(self, step, fields, **kw) self.close() def read(self, u, name, **kw): step = kw.get('step', 0) self.open() s = u.local_slice() dset = "/".join((name, "{}D".format(u.dimensions), str(step))) u[:] = self.f[dset][s] self.close() def _write_slice_step(self, name, step, slices, field, **kw): rank = field.rank slices = (slice(None),)*rank + tuple(slices) slices = list(slices) ndims = slices[rank:].count(slice(None)) slname = self._get_slice_name(slices[rank:]) s = field.local_slice() slices, inside = self._get_local_slices(slices, s) sp = np.nonzero([isinstance(x, slice) for x in slices])[0] sf = tuple(np.take(s, sp)) sl = tuple(slices) group = "/".join((name, "{}D".format(ndims), slname)) self.f.require_group(group) N = field.global_shape self.f[group].require_dataset(str(step), shape=tuple(np.take(N, sp)), dtype=field.dtype) if inside == 1: self.f["/".join((group, str(step)))][sf] = field[sl] def _write_group(self, name, u, step, **kw): s = u.local_slice() group = "/".join((name, "{}D".format(u.dimensions))) self.f.require_group(group) self.f[group].require_dataset(str(step), shape=u.global_shape, dtype=u.dtype) self.f["/".join((group, str(step)))][s] = u mpi4py-fft-2.0.6/mpi4py_fft/io/nc_file.py000066400000000000000000000167731462034230000201670ustar00rootroot00000000000000import os import numpy as np from mpi4py import MPI from .file_base import FileBase # https://github.com/Unidata/netcdf4-python/blob/master/examples/mpi_example.py # Note. Not using groups because Visit does not understand it __all__ = ('NCFile',) comm = MPI.COMM_WORLD class NCFile(FileBase): """Class for writing data to NetCDF4 format Parameters ---------- ncname : str Name of netcdf file to be created domain : Sequence, optional An optional spatial mesh or domain to go with the data. Sequence of either - 2-tuples, where each 2-tuple contains the (origin, length) of each dimension, e.g., (0, 2*pi). - Arrays of coordinates, e.g., np.linspace(0, 2*pi, N). One array per dimension. mode : str ``r``, ``w`` or ``a`` for read, write or append. Default is ``a``. clobber : bool, optional If True (default), opening a file with mode='w' will clobber an existing file with the same name. If False, an exception will be raised if a file with the same name already exists. kw : dict, optional Optional additional keyword arguments used when creating the file used to store data. Note ---- Each class instance creates one unique NetCDF4-file, with one step-counter. It is possible to store multiple fields in each file, but all snapshots of the fields must be taken at the same time. If you want one field stored every 10th timestep and another every 20th timestep, then use two different class instances with two different filenames ``ncname``. """ def __init__(self, ncname, domain=None, mode='a', clobber=True, **kw): FileBase.__init__(self, ncname, domain=domain) from netCDF4 import Dataset # netCDF4 does not seem to handle 'a' if the file does not already exist if mode == 'a' and not os.path.exists(ncname): mode = 'w' self.f = Dataset(ncname, mode=mode, clobber=clobber, parallel=True, comm=comm, **kw) self.dims = None if 'time' not in self.f.variables: self.f.createDimension('time', None) self.f.createVariable('time', float, ('time')) self.close() def _check_domain(self, group, field): N = field.global_shape[field.rank:] if self.domain is None: self.domain = [] for i in range(field.dimensions): self.domain.append(np.linspace(0, 2*np.pi, N[i])) assert len(self.domain) == field.dimensions if len(self.domain[0]) == 2: d = self.domain self.domain = [] for i in range(field.dimensions): self.domain.append(np.linspace(d[i][0], d[i][1], N[i])) self.dims = ['time'] for i in range(field.rank): ind = 'ijk'[i] self.dims.append(ind) if not ind in self.f.variables: self.f.createDimension(ind, field.dimensions) n = self.f.createVariable(ind, float, (ind)) n[:] = np.arange(field.dimensions) for i in range(field.dimensions): xyz = 'xyzrst'[i] self.dims.append(xyz) if not xyz in self.f.variables: self.f.createDimension(xyz, N[i]) nc_xyz = self.f.createVariable(xyz, float, (xyz)) nc_xyz[:] = self.domain[i] self.f.sync() @staticmethod def backend(): return 'netcdf4' def open(self, mode='r+'): from netCDF4 import Dataset self.f = Dataset(self.filename, mode=mode, parallel=True, comm=comm) def write(self, step, fields, **kw): """Write snapshot ``step`` of ``fields`` to NetCDF4 file Parameters ---------- step : int Index of snapshot. fields : dict The fields to be dumped to file. (key, value) pairs are group name and either arrays or 2-tuples, respectively. The arrays are complete arrays to be stored, whereas 2-tuples are arrays with associated *global* slices. as_scalar : boolean, optional Whether to store rank > 0 arrays as scalars. Default is False. Example ------- >>> from mpi4py import MPI >>> from mpi4py_fft import PFFT, NCFile, newDistArray >>> comm = MPI.COMM_WORLD >>> T = PFFT(comm, (15, 16, 17)) >>> u = newDistArray(T, forward_output=False, val=1) >>> v = newDistArray(T, forward_output=False, val=2) >>> f = NCFile('ncfilename.nc', mode='w') >>> f.write(0, {'u': [u, (u, [slice(None), 4, slice(None)])], ... 'v': [v, (v, [slice(None), 5, 5])]}) >>> f.write(1, {'u': [u, (u, [slice(None), 4, slice(None)])], ... 'v': [v, (v, [slice(None), 5, 5])]}) This stores the following datasets to the file ``ncfilename.nc``. Using in a terminal 'ncdump -h ncfilename.nc', one gets:: netcdf ncfilename { dimensions: time = UNLIMITED ; // (2 currently) x = 15 ; y = 16 ; z = 17 ; variables: double time(time) ; double x(x) ; double y(y) ; double z(z) ; double u(time, x, y, z) ; double u_slice_4_slice(time, x, z) ; double v(time, x, y, z) ; double v_slice_5_5(time, x) ; } """ self.open() nc_t = self.f.variables.get('time') nc_t.set_collective(True) it = nc_t.size if step in nc_t.__array__(): # If already stored at this step previously it = np.argwhere(nc_t.__array__() == step)[0][0] else: nc_t[it] = step FileBase.write(self, it, fields, **kw) self.close() def read(self, u, name, **kw): step = kw.get('step', 0) self.open() s = u.local_slice() s = (step,) + s u[:] = self.f[name][s] self.close() def _write_slice_step(self, name, step, slices, field, **kw): assert name not in self.dims # Crashes if user tries to name fields x, y, z, . rank = field.rank slices = list((slice(None),)*rank + tuple(slices)) slname = self._get_slice_name(slices[rank:]) s = field.local_slice() slices, inside = self._get_local_slices(slices, s) sp = np.nonzero([isinstance(x, slice) for x in slices])[0] sf = np.take(s, sp) sdims = ['time'] + list(np.take(self.dims, np.array(sp)+1)) fname = "_".join((name, slname)) if fname not in self.f.variables: h = self.f.createVariable(fname, field.dtype, sdims) else: h = self.f.variables[fname] h.set_collective(True) h[step] = 0 # collectively create dataset h.set_collective(False) sf = tuple([step] + list(sf)) sl = tuple(slices) if inside: h[sf] = field[sl] h.set_collective(True) self.f.sync() def _write_group(self, name, u, step, **kw): assert name not in self.dims # Crashes if user tries to name fields x, y, z, . s = u.local_slice() if name not in self.f.variables: h = self.f.createVariable(name, u.dtype, self.dims) else: h = self.f.variables[name] h.set_collective(True) s = (step,) + s h[s] = u self.f.sync() mpi4py-fft-2.0.6/mpi4py_fft/libfft.py000066400000000000000000000365451462034230000174260ustar00rootroot00000000000000import functools import numpy as np from . import fftw def _Xfftn_plan_pyfftw(shape, axes, dtype, transforms, options): import pyfftw opts = dict( avoid_copy=True, overwrite_input=True, auto_align_input=True, auto_contiguous=True, threads=1, ) opts.update(options) transforms = {} if transforms is None else transforms if tuple(axes) in transforms: plan_fwd, plan_bck = transforms[tuple(axes)] else: if np.issubdtype(dtype, np.floating): plan_fwd = pyfftw.builders.rfftn plan_bck = pyfftw.builders.irfftn else: plan_fwd = pyfftw.builders.fftn plan_bck = pyfftw.builders.ifftn s = tuple(np.take(shape, axes)) U = pyfftw.empty_aligned(shape, dtype=dtype) xfftn_fwd = plan_fwd(U, s=s, axes=axes, **opts) U.fill(0) if np.issubdtype(dtype, np.floating): del opts['overwrite_input'] V = xfftn_fwd.output_array xfftn_bck = plan_bck(V, s=s, axes=axes, **opts) V.fill(0) xfftn_fwd.update_arrays(U, V) xfftn_bck.update_arrays(V, U) wrapped_xfftn_bck = functools.partial(xfftn_bck, normalise_idft=False) functools.update_wrapper(wrapped_xfftn_bck, xfftn_bck, assigned=['input_array', 'output_array', '__doc__']) return (xfftn_fwd, wrapped_xfftn_bck) def _Xfftn_plan_fftw(shape, axes, dtype, transforms, options): opts = dict( overwrite_input='FFTW_DESTROY_INPUT', planner_effort='FFTW_MEASURE', threads=1, ) opts.update(options) flags = (fftw.flag_dict[opts['planner_effort']], fftw.flag_dict[opts['overwrite_input']]) threads = opts['threads'] transforms = {} if transforms is None else transforms if tuple(axes) in transforms: plan_fwd, plan_bck = transforms[tuple(axes)] else: if np.issubdtype(dtype, np.floating): plan_fwd = fftw.rfftn plan_bck = fftw.irfftn else: plan_fwd = fftw.fftn plan_bck = fftw.ifftn s = tuple(np.take(shape, axes)) U = fftw.aligned(shape, dtype=dtype) xfftn_fwd = plan_fwd(U, s=s, axes=axes, threads=threads, flags=flags) U.fill(0) V = xfftn_fwd.output_array if np.issubdtype(dtype, np.floating): flags = (fftw.flag_dict[opts['planner_effort']],) xfftn_bck = plan_bck(V, s=s, axes=axes, threads=threads, flags=flags, output_array=U) return (xfftn_fwd, xfftn_bck) def _Xfftn_plan_numpy(shape, axes, dtype, transforms, options): transforms = {} if transforms is None else transforms if tuple(axes) in transforms: plan_fwd, plan_bck = transforms[tuple(axes)] else: if np.issubdtype(dtype, np.floating): plan_fwd = np.fft.rfftn plan_bck = np.fft.irfftn else: plan_fwd = np.fft.fftn plan_bck = np.fft.ifftn s = tuple(np.take(shape, axes)) U = fftw.aligned(shape, dtype=dtype) V = plan_fwd(U, s=s, axes=axes).astype(dtype.char.upper()) # Numpy returns complex double if input single precision V = fftw.aligned_like(V) M = np.prod(s) # Numpy has forward transform unscaled and backward scaled with 1/N return (_Yfftn_wrap(plan_fwd, U, V, 1, {'s': s, 'axes': axes}), _Yfftn_wrap(plan_bck, V, U, M, {'s': s, 'axes': axes})) def _Xfftn_plan_mkl(shape, axes, dtype, transforms, options): #pragma: no cover transforms = {} if transforms is None else transforms if tuple(axes) in transforms: plan_fwd, plan_bck = transforms[tuple(axes)] else: if np.issubdtype(dtype, np.floating): from mkl_fft._numpy_fft import rfftn, irfftn plan_fwd = rfftn plan_bck = irfftn else: from mkl_fft._numpy_fft import fftn, ifftn plan_fwd = fftn plan_bck = ifftn s = tuple(np.take(shape, axes)) U = fftw.aligned(shape, dtype=dtype) V = plan_fwd(U, s=s, axes=axes) V = fftw.aligned_like(V) M = np.prod(s) return (_Yfftn_wrap(plan_fwd, U, V, 1, {'s': s, 'axes': axes}), _Yfftn_wrap(plan_bck, V, U, M, {'s': s, 'axes': axes})) def _Xfftn_plan_scipy(shape, axes, dtype, transforms, options): transforms = {} if transforms is None else transforms if tuple(axes) in transforms: plan_fwd, plan_bck = transforms[tuple(axes)] else: from scipy.fftpack import fftn, ifftn # No rfftn/irfftn methods plan_fwd = fftn plan_bck = ifftn s = tuple(np.take(shape, axes)) U = fftw.aligned(shape, dtype=dtype) V = plan_fwd(U, shape=s, axes=axes) V = fftw.aligned_like(V) M = np.prod(s) return (_Yfftn_wrap(plan_fwd, U, V, 1, {'shape': s, 'axes': axes}), _Yfftn_wrap(plan_bck, V, U, M, {'shape': s, 'axes': axes})) class _Yfftn_wrap(object): #Wraps numpy/scipy/mkl transforms to FFTW style # pylint: disable=too-few-public-methods __slots__ = ('_xfftn', '_M', '_opt', '__doc__', '_input_array', '_output_array') def __init__(self, xfftn_obj, input_array, output_array, M, opt): object.__setattr__(self, '_xfftn', xfftn_obj) object.__setattr__(self, '_opt', opt) object.__setattr__(self, '_M', M) object.__setattr__(self, '_input_array', input_array) object.__setattr__(self, '_output_array', output_array) object.__setattr__(self, '__doc__', xfftn_obj.__doc__) @property def input_array(self): return object.__getattribute__(self, '_input_array') @property def output_array(self): return object.__getattribute__(self, '_output_array') @property def xfftn(self): return object.__getattribute__(self, '_xfftn') @property def opt(self): return object.__getattribute__(self, '_opt') @property def M(self): return object.__getattribute__(self, '_M') def __call__(self, *args, **kwargs): self.opt.update(kwargs) self.output_array[...] = self.xfftn(self.input_array, **self.opt) if abs(self.M-1) > 1e-8: self._output_array *= self.M return self.output_array class _Xfftn_wrap(object): #Common interface for all serial transforms # pylint: disable=too-few-public-methods __slots__ = ('_xfftn', '__doc__', '_input_array', '_output_array') def __init__(self, xfftn_obj, input_array, output_array): object.__setattr__(self, '_xfftn', xfftn_obj) object.__setattr__(self, '_input_array', input_array) object.__setattr__(self, '_output_array', output_array) object.__setattr__(self, '__doc__', xfftn_obj.__doc__) @property def input_array(self): return object.__getattribute__(self, '_input_array') @property def output_array(self): return object.__getattribute__(self, '_output_array') @property def xfftn(self): return object.__getattribute__(self, '_xfftn') def __call__(self, input_array=None, output_array=None, **options): if input_array is not None: self.input_array[...] = input_array self.xfftn(**options) if output_array is not None: output_array[...] = self.output_array return output_array else: return self.output_array class FFTBase(object): """Base class for serial FFT transforms Parameters ---------- shape : list or tuple of ints shape of input array planned for axes : None, int or tuple of ints, optional axes to transform over. If None transform over all axes dtype : np.dtype, optional Type of input array padding : bool, number or list of numbers If False, then no padding. If number, then apply this number as padding factor for all axes. If list of numbers, then each number gives the padding for each axis. Must be same length as axes. """ def __init__(self, shape, axes=None, dtype=float, padding=False): shape = list(shape) if np.ndim(shape) else [shape] assert len(shape) > 0 assert min(shape) > 0 if axes is not None: axes = list(axes) if np.ndim(axes) else [axes] for i, axis in enumerate(axes): if axis < 0: axes[i] = axis + len(shape) else: axes = list(range(len(shape))) assert min(axes) >= 0 assert max(axes) < len(shape) assert 0 < len(axes) <= len(shape) assert sorted(axes) == sorted(set(axes)) dtype = np.dtype(dtype) assert dtype.char in 'fdgFDG' self.shape = shape self.axes = axes self.dtype = dtype self.padding = padding self.real_transform = np.issubdtype(dtype, np.floating) self.padding_factor = 1 def _truncation_forward(self, padded_array, trunc_array): axis = self.axes[-1] if self.padding_factor > 1.0+1e-8: trunc_array.fill(0) N0 = self.forward.output_array.shape[axis] if self.real_transform: N = trunc_array.shape[axis] s = [slice(None)]*trunc_array.ndim s[axis] = slice(0, N) trunc_array[:] = padded_array[tuple(s)] if N0 % 2 == 0: s[axis] = N-1 s = tuple(s) trunc_array[s] = trunc_array[s].real trunc_array[s] *= 2 else: N = trunc_array.shape[axis] su = [slice(None)]*trunc_array.ndim su[axis] = slice(0, N//2+1) trunc_array[tuple(su)] = padded_array[tuple(su)] su[axis] = slice(-(N//2), None) trunc_array[tuple(su)] += padded_array[tuple(su)] def _padding_backward(self, trunc_array, padded_array): axis = self.axes[-1] if self.padding_factor > 1.0+1e-8: padded_array.fill(0) N0 = self.forward.output_array.shape[axis] if self.real_transform: s = [slice(0, n) for n in trunc_array.shape] padded_array[tuple(s)] = trunc_array[:] N = trunc_array.shape[axis] if N0 % 2 == 0: # Symmetric Fourier interpolator s[axis] = N-1 s = tuple(s) padded_array[s] = padded_array[s].real padded_array[s] *= 0.5 else: N = trunc_array.shape[axis] su = [slice(None)]*trunc_array.ndim su[axis] = slice(0, N//2+1) padded_array[tuple(su)] = trunc_array[tuple(su)] su[axis] = slice(-(N//2), None) padded_array[tuple(su)] = trunc_array[tuple(su)] if N0 % 2 == 0: # Use symmetric Fourier interpolator su[axis] = N//2 padded_array[tuple(su)] *= 0.5 su[axis] = -(N//2) padded_array[tuple(su)] *= 0.5 class FFT(FFTBase): """Class for serial FFT transforms Parameters ---------- shape : list or tuple of ints shape of input array planned for axes : None, int or tuple of ints, optional axes to transform over. If None transform over all axes dtype : np.dtype, optional Type of input array padding : bool, number or list of numbers If False, then no padding. If number, then apply this number as padding factor for all axes. If list of numbers, then each number gives the padding for each axis. Must be same length as axes. backend : str, optional Choose backend for serial transforms (``fftw``, ``pyfftw``, ``numpy``, ``scipy``, ``mkl_fft``). Default is ``fftw`` transforms : None or dict, optional Dictionary of axes to serial transforms (forward and backward) along those axes. For example:: {(0, 1): (dctn, idctn), (2, 3): (dstn, idstn)} If missing the default is to use rfftn/irfftn for real input arrays and fftn/ifftn for complex input arrays. Real-to-real transforms can be configured using this dictionary and real-to-real transforms from the :mod:`.fftw.xfftn` module. kw : dict Parameters passed to serial transform object Methods ------- forward(input_array=None, output_array=None, **options) Generic serial forward transform Parameters ---------- input_array : array, optional output_array : array, optional options : dict parameters to serial transforms Returns ------- output_array : array backward(input_array=None, output_array=None, **options) Generic serial backward transform Parameters ---------- input_array : array, optional output_array : array, optional options : dict parameters to serial transforms Returns ------- output_array : array """ def __init__(self, shape, axes=None, dtype=float, padding=False, backend='fftw', transforms=None, **kw): FFTBase.__init__(self, shape, axes, dtype, padding) plan = { 'pyfftw': _Xfftn_plan_pyfftw, 'fftw': _Xfftn_plan_fftw, 'numpy': _Xfftn_plan_numpy, 'mkl_fft': _Xfftn_plan_mkl, 'scipy': _Xfftn_plan_scipy, }[backend] self.backend = backend self.fwd, self.bck = plan(self.shape, self.axes, self.dtype, transforms, kw) U, V = self.fwd.input_array, self.fwd.output_array self.M = 1 if backend != 'fftw': self.M = 1./np.prod(np.take(self.shape, self.axes)) else: self.M = self.fwd.get_normalization() if backend == 'scipy': self.real_transform = False # No rfftn/irfftn methods self.padding_factor = 1.0 if padding is not False: self.padding_factor = padding[self.axes[-1]] if np.ndim(padding) else padding if abs(self.padding_factor-1.0) > 1e-8: assert len(self.axes) == 1 trunc_array = self._get_truncarray(shape, V.dtype) self.forward = _Xfftn_wrap(self._forward, U, trunc_array) self.backward = _Xfftn_wrap(self._backward, trunc_array, U) else: self.forward = _Xfftn_wrap(self._forward, U, V) self.backward = _Xfftn_wrap(self._backward, V, U) def _forward(self, **kw): normalize = kw.pop('normalize', True) self.fwd(None, None, **kw) self._truncation_forward(self.fwd.output_array, self.forward.output_array) if normalize: self.forward._output_array *= self.M return self.forward.output_array def _backward(self, **kw): normalize = kw.pop('normalize', False) self._padding_backward(self.backward.input_array, self.bck.input_array) self.bck(None, None, **kw) if normalize: self.backward._output_array *= self.M return self.backward.output_array def _get_truncarray(self, shape, dtype): axis = self.axes[-1] if not self.real_transform: shape = list(shape) shape[axis] = int(np.round(shape[axis] / self.padding_factor)) return fftw.aligned(shape, dtype=dtype) shape = list(shape) shape[axis] = int(np.round(shape[axis] / self.padding_factor)) shape[axis] = shape[axis]//2 + 1 return fftw.aligned(shape, dtype=dtype) mpi4py-fft-2.0.6/mpi4py_fft/mpifft.py000066400000000000000000000365141462034230000174410ustar00rootroot00000000000000import numpy as np from .libfft import FFT from .pencil import Pencil from .pencil import Subcomm class Transform(object): """Class for performing any parallel transform, forward or backward Parameters ---------- xfftn : list of serial transform objects transfer : list of global redistribution objects pencil : list of two pencil objects The two pencils represent the input and final output configuration of the distributed global arrays """ def __init__(self, xfftn, transfer, pencil): assert len(xfftn) == len(transfer) + 1 and len(pencil) == 2 self._xfftn = tuple(xfftn) self._transfer = tuple(transfer) self._pencil = tuple(pencil) @property def input_array(self): """Return input array of Transform""" return self._xfftn[0].input_array @property def output_array(self): """Return output array of Transform""" return self._xfftn[-1].output_array @property def input_pencil(self): """Return input pencil of Transform""" return self._pencil[0] @property def output_pencil(self): """Return output pencil of Transform""" return self._pencil[1] def __call__(self, input_array=None, output_array=None, **kw): """Compute transform Parameters ---------- input_array : array, optional output_array : array, optional kw : dict parameters to serial transforms Note in particular that the keyword 'normalize'=True/False can be used to turn normalization on or off. Default is to enable normalization for forward transforms and disable it for backward. Note ---- If input_array/output_array are not given, then use predefined arrays as planned with serial transform object _xfftn. """ if input_array is not None: self.input_array[...] = input_array for i in range(len(self._transfer)): self._xfftn[i](**kw) arrayA = self._xfftn[i].output_array arrayB = self._xfftn[i+1].input_array self._transfer[i](arrayA, arrayB) self._xfftn[-1](**kw) if output_array is not None: output_array[...] = self.output_array return output_array else: return self.output_array class PFFT(object): """Base class for parallel FFT transforms Parameters ---------- comm : MPI communicator shape : sequence of ints, optional shape of input array planned for axes : None, int, sequence of ints or sequence of sequence of ints, optional axes to transform over. - None -> All axes are transformed - int -> Just one axis to transform over - sequence of ints -> e.g., (0, 1, 2) or (0, 2, 1) - sequence of sequence of ints -> e.g., ((0,), (1,)) or ((0,), (1, 2)) For seq. of seq. of ints all but the last transformed sequence may be longer than 1. This corresponds to collapsing axes, where serial FFTs are performed for all collapsed axes in one single call dtype : np.dtype, optional Type of input array grid : sequence of ints, optional Define processor grid sizes. Non positive values act as wildcards to allow MPI compute optimal decompositions. The sequence is padded with ones to match the global transform dimension. Use ``(-1,)`` to get a slab decomposition on the first axis. Use ``(1, -1)`` to get a slab decomposition on the second axis. Use ``(P, Q)`` or ``(P, Q, 1)`` to get a 3D transform with 2D-pencil decomposition on a PxQ processor grid with the last axis non distributed. Use ``(P, 1, Q)`` to get a 3D transform with 2D-pencil decomposition on a PxQ processor grid with the second to last axis non distributed. padding : bool, number or sequence of numbers, optional If False, then no padding. If number, then apply this number as padding factor for all axes. If sequence of numbers, then each number gives the padding for each axis. Must be same length as axes. collapse : bool, optional If True try to collapse several serial transforms into one backend : str, optional Choose backend for serial transforms (``fftw``, ``pyfftw``, ``numpy``, ``scipy``, ``mkl_fft``). Default is ``fftw`` transforms : None or dict, optional Dictionary of axes to serial transforms (forward and backward) along those axes. For example:: {(0, 1): (dctn, idctn), (2, 3): (dstn, idstn)} If missing the default is to use rfftn/irfftn for real input arrays and fftn/ifftn for complex input arrays. Real-to-real transforms can be configured using this dictionary and real-to-real transforms from the :mod:`.fftw.xfftn` module. See Examples. Other Parameters ---------------- darray : DistArray object, optional Create PFFT using information contained in ``darray``, neglecting most optional Parameters above slab : bool or int, optional DEPRECATED. If True then distribute only one axis of the global array. Methods ------- forward(input_array=None, output_array=None, **kw) Parallel forward transform. The method is an instance of the :class:`.Transform` class. See :meth:`.Transform.__call__` Parameters ---------- input_array : array, optional output_array : array, optional kw : dict parameters to serial transforms Returns ------- output_array : array backward(input_array=None, output_array=None, **kw) Parallel backward transform. The method is an instance of the :class:`.Transform` class. See :meth:`.Transform.__call__` Parameters ---------- input_array : array, optional output_array : array, optional kw : dict parameters to serial transforms Returns ------- output_array : array Examples -------- >>> import numpy as np >>> from mpi4py import MPI >>> from mpi4py_fft import PFFT, newDistArray >>> N = np.array([12, 14, 15], dtype=int) >>> fft = PFFT(MPI.COMM_WORLD, N, axes=(0, 1, 2)) >>> u = newDistArray(fft, False) >>> u[:] = np.random.random(u.shape).astype(u.dtype) >>> u_hat = fft.forward(u) >>> uj = np.zeros_like(u) >>> uj = fft.backward(u_hat, uj) >>> assert np.allclose(uj, u) Now configure with real-to-real discrete cosine transform type 3 >>> from mpi4py_fft.fftw import rfftn, irfftn, dctn, idctn >>> import functools >>> dct = functools.partial(dctn, type=3) >>> idct = functools.partial(idctn, type=3) >>> transforms = {(1, 2): (dct, idct)} >>> r2c = PFFT(MPI.COMM_WORLD, N, axes=((0,), (1, 2)), transforms=transforms) >>> u = newDistArray(r2c, False) >>> u[:] = np.random.random(u.shape).astype(u.dtype) >>> u_hat = r2c.forward(u) >>> uj = np.zeros_like(u) >>> uj = r2c.backward(u_hat, uj) >>> assert np.allclose(uj, u) """ def __init__(self, comm, shape=None, axes=None, dtype=float, grid=None, padding=False, collapse=False, backend='fftw', transforms=None, darray=None, **kw): # pylint: disable=too-many-locals # pylint: disable=too-many-branches # pylint: disable=too-many-statements if shape is None: assert darray is not None shape = darray.pencil.shape if axes is not None: axes = list(axes) if not isinstance(axes, int) else [axes] else: axes = list(range(len(shape))) if darray is not None: # Make sure aligned axis of darray is transformed first axes = list(np.roll(axes, len(shape)-1-darray.alignment)) for i, ax in enumerate(axes): if isinstance(ax, (int, np.integer)): if ax < 0: ax += len(shape) axes[i] = (ax,) else: assert isinstance(ax, (tuple, list)) ax = list(ax) for j, a in enumerate(ax): assert isinstance(a, int) if a < 0: a += len(shape) ax[j] = a axes[i] = ax assert min(axes[i]) >= 0 assert max(axes[i]) < len(shape) assert 0 < len(axes[i]) <= len(shape) assert sorted(axes[i]) == sorted(set(axes[i])) self.axes = axes shape = list(shape) if darray is None: dtype = np.dtype(dtype) assert dtype.char in 'fdgFDG' if padding is not False: assert len(padding) == len(shape) for ax in axes: if len(ax) == 1 and padding[ax[0]] > 1.0+1e-6: old = float(shape[ax[0]]) shape[ax[0]] = int(np.floor(shape[ax[0]]*padding[ax[0]])) padding[ax[0]] = shape[ax[0]] / old self._input_shape = tuple(shape) assert len(shape) > 0 assert min(shape) > 0 slab = kw.pop('slab', False) if grid is not None: assert not isinstance(comm, Subcomm) assert slab is False grid = tuple(grid) assert len(grid) <= len(shape) dims = list(grid) + [1] * (len(shape) - len(grid)) comm = Subcomm(comm, dims) if isinstance(comm, Subcomm): assert slab is False assert len(comm) == len(shape) assert np.all([comm[ax].Get_size() == 1 for ax in axes[-1]]) self.subcomm = comm else: if slab is False or slab is None: dims = [0] * len(shape) for ax in axes[-1]: dims[ax] = 1 else: #pragma: no cover if slab is True: axis = (axes[-1][-1] + 1) % len(shape) else: axis = slab if axis < 0: axis = axis + len(shape) assert 0 <= axis < len(shape) dims = [1] * len(shape) dims[axis] = comm.Get_size() self.subcomm = Subcomm(comm, dims) else: dtype = darray.dtype self.subcomm = darray.subcomm self._input_shape = tuple(shape) commsizes = darray.commsizes assert np.all([commsizes[ax] == 1 for ax in axes[-1]]), "Set keyword axes such that axes to transform first are aligned" self.collapse = collapse if collapse is True: groups = [[]] for ax in reversed(axes): if np.all([self.subcomm[axis].Get_size() == 1 for axis in ax]): [groups[0].insert(0, axis) for axis in reversed(ax)] else: groups.insert(0, ax) axes = groups self.axes = tuple(map(tuple, axes)) self.xfftn = [] self.transfer = [] self.pencil = [None, None] axes = self.axes[-1] pencil = Pencil(self.subcomm, shape, axes[-1]) xfftn = FFT(pencil.subshape, axes, dtype, padding, backend=backend, transforms=transforms, **kw) self.xfftn.append(xfftn) self.pencil[0] = pencilA = pencil if not shape[axes[-1]] == xfftn.forward.output_array.shape[axes[-1]]: dtype = xfftn.forward.output_array.dtype shape[axes[-1]] = xfftn.forward.output_array.shape[axes[-1]] pencilA = Pencil(self.subcomm, shape, axes[-1]) for axes in reversed(self.axes[:-1]): pencilB = pencilA.pencil(axes[-1]) transAB = pencilA.transfer(pencilB, dtype) xfftn = FFT(pencilB.subshape, axes, dtype, padding, backend=backend, transforms=transforms, **kw) self.xfftn.append(xfftn) self.transfer.append(transAB) pencilA = pencilB if not shape[axes[-1]] == xfftn.forward.output_array.shape[axes[-1]]: dtype = xfftn.forward.output_array.dtype shape[axes[-1]] = xfftn.forward.output_array.shape[axes[-1]] pencilA = Pencil(pencilB.subcomm, shape, axes[-1]) self.pencil[1] = pencilA self._output_shape = tuple(shape) self.forward = Transform( [o.forward for o in self.xfftn], [o.forward for o in self.transfer], self.pencil) self.backward = Transform( [o.backward for o in self.xfftn[::-1]], [o.backward for o in self.transfer[::-1]], self.pencil[::-1]) def destroy(self): if isinstance(self.subcomm, Subcomm): self.subcomm.destroy() for trans in self.transfer: trans.destroy() def shape(self, forward_output=True): """The local (to each processor) shape of data Parameters ---------- forward_output : bool, optional Return shape of output array (spectral space) if True, else return shape of input array (physical space) """ if forward_output is not True: return self.forward.input_pencil.subshape return self.forward.output_array.shape def local_slice(self, forward_output=True): """The local view into the global data Parameters ---------- forward_output : bool, optional Return local slices of output array (spectral space) if True, else return local slices of input array (physical space) """ if forward_output is not True: ip = self.forward.input_pencil s = [slice(start, start+shape) for start, shape in zip(ip.substart, ip.subshape)] else: ip = self.backward.input_pencil s = [slice(start, start+shape) for start, shape in zip(ip.substart, ip.subshape)] return tuple(s) def global_shape(self, forward_output=False): """Return global shape of associated tensors Parameters ---------- forward_output : bool, optional If True then return global shape of spectral space, i.e., the input to a backward transfer. If False then return shape of physical space, i.e., the input to a forward transfer. """ if forward_output: return self._output_shape return self._input_shape @property def dimensions(self): """The number of dimensions for transformed arrays""" return len(self.forward.input_array.shape) def dtype(self, forward_output=False): """The type of transformed arrays Parameters ---------- forward_output : bool, optional If True then return dtype of an array that is the result of a forward transform. Otherwise, return the dtype of an array that is input to a forward transform. """ if forward_output: return self.forward.output_array.dtype return self.forward.input_array.dtype mpi4py-fft-2.0.6/mpi4py_fft/pencil.py000066400000000000000000000270401462034230000174200ustar00rootroot00000000000000import numpy as np from mpi4py import MPI def _blockdist(N, size, rank): q, r = divmod(N, size) n = q + (1 if r > rank else 0) s = rank * q + min(rank, r) return (n, s) def _subarraytypes(comm, shape, axis, subshape, dtype): # pylint: disable=too-many-locals # pylint: disable=protected-access N = shape[axis] p = comm.Get_size() datatype = MPI._typedict[dtype.char] sizes = list(subshape) subsizes = sizes[:] substarts = [0] * len(sizes) datatypes = [] for i in range(p): n, s = _blockdist(N, p, i) subsizes[axis] = n substarts[axis] = s newtype = datatype.Create_subarray( sizes, subsizes, substarts).Commit() datatypes.append(newtype) return tuple(datatypes) class Subcomm(tuple): r"""Class returning a tuple of subcommunicators of any dimensionality Parameters ---------- comm : A communicator or group of communicators dims : None, int or sequence of ints dims = [0, 0, 1] will give communicators distributed in the two first indices, whereas the third will not be distributed Examples -------- >>> import subprocess >>> fx = open('subcomm_script.py', 'w') >>> h = fx.write(''' ... from mpi4py import MPI ... comm = MPI.COMM_WORLD ... from mpi4py_fft.pencil import Subcomm ... subcomms = Subcomm(comm, [0, 0, 1]) ... if comm.Get_rank() == 0: ... for subcomm in subcomms: ... print(subcomm.Get_size())''') >>> fx.close() >>> print(subprocess.getoutput('mpirun -np 4 python subcomm_script.py')) 2 2 1 >>> print(subprocess.getoutput('mpirun -np 6 python subcomm_script.py')) 3 2 1 """ def __new__(cls, comm, dims=None, reorder=True): assert not comm.Is_inter() if comm.Get_topology() == MPI.CART: assert comm.Get_dim() > 0 assert dims is None cartcomm = comm else: if dims is None: dims = [0] elif np.ndim(dims) > 0: assert len(dims) > 0 dims = [max(0, d) for d in dims] else: assert dims > 0 dims = [0] * dims dims = MPI.Compute_dims(comm.Get_size(), dims) cartcomm = comm.Create_cart(dims, reorder=reorder) dim = cartcomm.Get_dim() subcomm = [None] * dim remdims = [False] * dim for i in range(dim): remdims[i] = True subcomm[i] = cartcomm.Sub(remdims) remdims[i] = False if cartcomm != comm: cartcomm.Free() return super(Subcomm, cls).__new__(cls, subcomm) def destroy(self): for comm in self: if comm: comm.Free() class Transfer(object): """Class for performing global redistributions Parameters ---------- comm : MPI communicator shape : sequence of ints shape of input array planned for dtype : np.dtype, optional Type of input array subshapeA : sequence of ints Shape of input pencil axisA : int Input array aligned in this direction subshapeB : sequence of ints Shape of output pencil axisB : int Output array aligned in this direction Examples -------- Create two pencils for a 4-dimensional array of shape (8, 8, 8, 8) using 4 processors in total. The input pencil will be distributed in the first two axes, whereas the output pencil will be distributed in axes 1 and 2. Create a random array of shape according to the input pencil and transfer its values to an array of the output shape. >>> import subprocess >>> fx = open('transfer_script.py', 'w') >>> h = fx.write(''' ... import numpy as np ... from mpi4py import MPI ... from mpi4py_fft.pencil import Subcomm, Pencil ... comm = MPI.COMM_WORLD ... N = (8, 8, 8, 8) ... subcomms = Subcomm(comm, [0, 0, 1, 0]) ... axis = 2 ... p0 = Pencil(subcomms, N, axis) ... p1 = p0.pencil(0) ... transfer = p0.transfer(p1, float) ... a0 = np.zeros(p0.subshape, dtype=float) ... a1 = np.zeros(p1.subshape) ... a0[:] = np.random.random(a0.shape) ... transfer.forward(a0, a1) ... s0 = comm.reduce(np.sum(a0**2)) ... s1 = comm.reduce(np.sum(a1**2)) ... if comm.Get_rank() == 0: ... assert np.allclose(s0, s1)''') >>> fx.close() >>> h=subprocess.getoutput('mpirun -np 4 python transfer_script.py') """ def __init__(self, comm, shape, dtype, subshapeA, axisA, subshapeB, axisB): self.comm = comm self.shape = tuple(shape) self.dtype = dtype = np.dtype(dtype) self.subshapeA, self.axisA = tuple(subshapeA), axisA self.subshapeB, self.axisB = tuple(subshapeB), axisB self._subtypesA = _subarraytypes(comm, shape, axisA, subshapeA, dtype) self._subtypesB = _subarraytypes(comm, shape, axisB, subshapeB, dtype) size = comm.Get_size() self._counts_displs = ([1] * size, [0] * size) # XXX (None, None) def forward(self, arrayA, arrayB): """Global redistribution from arrayA to arrayB Parameters ---------- arrayA : array Array of shape subshapeA, containing data to be redistributed arrayB : array Array of shape subshapeB, for receiving data """ assert self.subshapeA == arrayA.shape assert self.subshapeB == arrayB.shape assert self.dtype == arrayA.dtype assert self.dtype == arrayB.dtype self.comm.Alltoallw([arrayA, self._counts_displs, self._subtypesA], [arrayB, self._counts_displs, self._subtypesB]) def backward(self, arrayB, arrayA): """Global redistribution from arrayB to arrayA Parameters ---------- arrayB : array Array of shape subshapeB, containing data to be redistributed arrayA : array Array of shape subshapeA, for receiving data """ assert self.subshapeA == arrayA.shape assert self.subshapeB == arrayB.shape assert self.dtype == arrayA.dtype assert self.dtype == arrayB.dtype self.comm.Alltoallw([arrayB, self._counts_displs, self._subtypesB], [arrayA, self._counts_displs, self._subtypesA]) def destroy(self): for datatype in self._subtypesA: if datatype: datatype.Free() for datatype in self._subtypesB: if datatype: datatype.Free() class Pencil(object): """Class to represent a distributed array (pencil) Parameters ---------- subcomm : MPI communicator shape : sequence of ints Shape of global array axis : int, optional Pencil is aligned in this direction Examples -------- Create two pencils for a 4-dimensional array of shape (8, 8, 8, 8) using 4 processors in total. The input pencil will be distributed in the first two axes, whereas the output pencil will be distributed in axes 1 and 2. Note that the Subcomm instance below may distribute any axis where an entry 0 is found, whereas an entry of 1 means that this axis should not be distributed. >>> import subprocess >>> fx = open('pencil_script.py', 'w') >>> h = fx.write(''' ... import numpy as np ... from mpi4py import MPI ... from mpi4py_fft.pencil import Subcomm, Pencil ... comm = MPI.COMM_WORLD ... N = (8, 8, 8, 8) ... subcomms = Subcomm(comm, [0, 0, 1, 0]) ... axis = 2 ... p0 = Pencil(subcomms, N, axis) ... p1 = p0.pencil(0) ... shape0 = comm.gather(p0.subshape) ... shape1 = comm.gather(p1.subshape) ... if comm.Get_rank() == 0: ... print('Subshapes all 4 processors pencil p0:') ... print(np.array(shape0)) ... print('Subshapes all 4 processors pencil p1:') ... print(np.array(shape1))''') >>> fx.close() >>> print(subprocess.getoutput('mpirun -np 4 python pencil_script.py')) Subshapes all 4 processors pencil p0: [[4 4 8 8] [4 4 8 8] [4 4 8 8] [4 4 8 8]] Subshapes all 4 processors pencil p1: [[8 4 4 8] [8 4 4 8] [8 4 4 8] [8 4 4 8]] Two index sets of the global data of shape (8, 8, 8, 8) are distributed. This means that the current distribution is using two groups of processors, with 2 processors in each group (4 in total). One group shares axis 0 and the other axis 1 on the input arrays. On the output, one group shares axis 1, whereas the other shares axis 2. Note that the call ``p1 = p0.pencil(0)`` creates a new pencil (p1) that is non-distributed in axes 0. It is, in other words, aligned in axis 0. Hence the first 8 in the lists with [8 4 4 8] above. The alignment is configurable, and ``p1 = p0.pencil(1)`` would lead to an output pencil aligned in axis 1. """ def __init__(self, subcomm, shape, axis=-1): assert len(shape) >= 2 assert min(shape) >= 1 assert -len(shape) <= axis < len(shape) assert 1 <= len(subcomm) <= len(shape) if axis < 0: axis += len(shape) if len(subcomm) < len(shape): subcomm = list(subcomm) while len(subcomm) < len(shape) - 1: subcomm.append(MPI.COMM_SELF) subcomm.insert(axis, MPI.COMM_SELF) assert len(subcomm) == len(shape) assert subcomm[axis].Get_size() == 1 subshape = [None] * len(shape) substart = [None] * len(shape) for i, comm in enumerate(subcomm): size = comm.Get_size() rank = comm.Get_rank() assert shape[i] >= size n, s = _blockdist(shape[i], size, rank) subshape[i] = n substart[i] = s self.shape = tuple(shape) self.axis = axis self.subcomm = tuple(subcomm) self.subshape = tuple(subshape) self.substart = tuple(substart) def pencil(self, axis): """Return a Pencil aligned with axis Parameters ---------- axis : int The axis along which the pencil is aligned """ assert -len(self.shape) <= axis < len(self.shape) if axis < 0: axis += len(self.shape) i, j = self.axis, axis subcomm = list(self.subcomm) subcomm[j], subcomm[i] = subcomm[i], subcomm[j] return Pencil(subcomm, self.shape, axis) def transfer(self, pencil, dtype): """Return an appropriate instance of the :class:`.Transfer` class The returned :class:`.Transfer` class is used for global redistribution from this pencil's instance to the pencil instance provided. Parameters ---------- pencil : :class:`.Pencil` The receiving pencil of a forward transform dtype : dtype The type of the sending pencil """ penA, penB = self, pencil assert penA.shape == penB.shape assert penA.axis != penB.axis for i in range(len(penA.shape)): if i != penA.axis and i != penB.axis: assert penA.subcomm[i] == penB.subcomm[i] assert penA.subshape[i] == penB.subshape[i] assert penA.subcomm[penB.axis] == penB.subcomm[penA.axis] axis = penB.axis comm = penA.subcomm[axis] shape = list(penA.subshape) shape[axis] = penA.shape[axis] return Transfer(comm, shape, dtype, penA.subshape, penA.axis, penB.subshape, penB.axis) mpi4py-fft-2.0.6/pyproject.toml000066400000000000000000000001651462034230000164260ustar00rootroot00000000000000[build-system] requires = ["setuptools >= 42", "numpy", "cython >= 0.29.32"] build-backend = "setuptools.build_meta" mpi4py-fft-2.0.6/requirements.txt000066400000000000000000000000241462034230000167700ustar00rootroot00000000000000numpy cython mpi4py mpi4py-fft-2.0.6/setup.py000066400000000000000000000175601462034230000152330ustar00rootroot00000000000000#!/usr/bin/env python """mpi4py-fft -- Parallel Fast Fourier Transforms (FFTs) using MPI for Python""" import os import sys import re import platform import sysconfig from distutils import ccompiler from distutils.errors import DistutilsPlatformError from setuptools import setup from setuptools.dist import Distribution from setuptools.extension import Extension import numpy cwd = os.path.abspath(os.path.dirname(__file__)) fftwdir = os.path.join(cwd, 'mpi4py_fft', 'fftw') prec_map = {'float': 'f', 'double': '', 'long double': 'l'} triplet = sysconfig.get_config_var('MULTIARCH') or '' bits = platform.architecture()[0][:-3] def append(dirlist, *args): entry = os.path.join(*args) entry = os.path.normpath(entry) if os.path.isdir(entry): if entry not in dirlist: dirlist.append(entry) def get_prefix_dirs(): dirs = [] for envvar in ('FFTW_ROOT', 'FFTW_DIR'): if envvar in os.environ: prefix = os.environ[envvar] append(dirs, prefix) append(dirs, sys.prefix) if 'CONDA_BUILD' not in os.environ: append(dirs, '/usr') append(dirs, '/usr/local') append(dirs, '/opt/homebrew') return dirs def get_include_dirs(): dirs = [] if 'FFTW_INCLUDE_DIR' in os.environ: entry = os.environ['FFTW_INCLUDE_DIR'] append(dirs, entry) for prefix in get_prefix_dirs(): append(dirs, prefix, 'include', triplet) append(dirs, prefix, 'include') dirs.append(numpy.get_include()) return dirs def get_library_dirs(): dirs = [] if 'FFTW_LIBRARY_DIR' in os.environ: entry = os.environ['FFTW_LIBRARY_DIR'] append(dirs, entry) for prefix in get_prefix_dirs(): append(dirs, prefix, 'lib' + bits) append(dirs, prefix, 'lib', triplet) append(dirs, prefix, 'lib') return dirs def get_fftw_libs(): """Return FFTW libraries""" compiler = ccompiler.new_compiler() library_dirs = get_library_dirs() libs = {} for d in ('float', 'double', 'long double'): lib = 'fftw3'+prec_map[d] tlib = lib+'_threads' if compiler.find_library_file(library_dirs, lib): libs[d] = [lib] if compiler.find_library_file(library_dirs, tlib): libs[d].append(tlib) if os.name == 'posix': libs[d].append('m') if not libs: message = "No FFTW libraries found in {}".format(library_dirs) raise DistutilsPlatformError(message) return libs def generate_extensions(fftwlibs, force=True): """Generate files with float and long double""" try: from setuptools.modified import newer_group except ImportError: try: from setuptools.dep_util import newer_group except ImportError: from distutils.dep_util import newer_group for d in fftwlibs: if d == 'double': continue p = 'fftw'+prec_map[d]+'_' for fname in ( 'fftw_planxfftn.h', 'fftw_planxfftn.c', 'fftw_xfftn.pyx', 'fftw_xfftn.pxd', ): src = os.path.join(fftwdir, fname) dst = os.path.join(fftwdir, fname.replace('fftw_', p)) if force or newer_group([src], dst, 'newer'): with open(src, 'r') as fin: code = fin.read() code = re.sub('fftw_', p, code) code = re.sub('double', d, code) with open(dst, 'w') as fout: fout.write(code) def remove_extensions(fftwlibs): """Remove generated files""" for fname in ( 'utilities.c', 'fftw_xfftn.c', 'fftwf_xfftn.c', 'fftwl_xfftn.c', ): dst = os.path.join(fftwdir, fname) try: os.remove(dst) except OSError: pass for d in fftwlibs: if d == 'double': continue p = 'fftw'+prec_map[d]+'_' for fname in ( 'fftw_planxfftn.h', 'fftw_planxfftn.c', 'fftw_xfftn.pyx', 'fftw_xfftn.pxd', ): dst = os.path.join(fftwdir, fname.replace('fftw_', p)) try: os.remove(dst) except OSError: pass def get_extensions(): """Return list of extension modules""" include_dirs = get_include_dirs() library_dirs = get_library_dirs() ext = [ Extension( "mpi4py_fft.fftw.utilities", sources=[os.path.join(fftwdir, "utilities.pyx")], define_macros=[('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')], include_dirs=include_dirs, ), ] sdist = 'sdist' in sys.argv egg_info = 'egg_info' in sys.argv fftwlibs = ( get_fftw_libs() if not (sdist or egg_info) else {d: [] for d in ('float', 'double', 'long double')} ) for d, libs in fftwlibs.items(): p = 'fftw' + prec_map[d] + '_' ext.append( Extension( "mpi4py_fft.fftw.{}xfftn".format(p), sources=[ os.path.join(fftwdir, "{}xfftn.pyx".format(p)), os.path.join(fftwdir, "{}planxfftn.c".format(p)), ], define_macros=[('NPY_NO_DEPRECATED_API', 'NPY_1_7_API_VERSION')], libraries=libs, include_dirs=include_dirs, library_dirs=library_dirs, ) ) return ext class Dist(Distribution): def get_command_class(self, command): get_command_class = Distribution.get_command_class if 'build_ext' not in self.cmdclass: _build_ext = get_command_class(self, 'build_ext') class build_ext(_build_ext): def run(self): fftw_libs = get_fftw_libs() generate_extensions(fftw_libs, self.force) _build_ext.run(self) self.cmdclass['build_ext'] = build_ext if 'clean' not in self.cmdclass: _clean = get_command_class(self, 'clean') class clean(_clean): def run(self): fftw_libs = get_fftw_libs() remove_extensions(fftw_libs) _clean.run(self) self.cmdclass['clean'] = clean return get_command_class(self, command) def version(): srcdir = os.path.join(cwd, 'mpi4py_fft') with open(os.path.join(srcdir, '__init__.py')) as f: m = re.search(r"__version__\s*=\s*'(.*)'", f.read()) return m.groups()[0] with open("README.rst", "r") as fh: long_description = fh.read() if __name__ == '__main__': setup(name="mpi4py-fft", version=version(), description=__doc__.strip(), long_description=long_description, long_description_content_type='text/x-rst', author="Lisandro Dalcin and Mikael Mortensen", url="https://github.com/mpi4py/mpi4py-fft", packages=[ "mpi4py_fft", "mpi4py_fft.fftw", "mpi4py_fft.io", ], package_dir={ "mpi4py_fft": "mpi4py_fft", }, classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'License :: OSI Approved :: BSD License', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Software Development :: Libraries :: Python Modules', ], keywords=['Python', 'FFTW', 'FFT', 'DCT', 'DST', 'MPI'], distclass=Dist, ext_modules=get_extensions(), install_requires=["mpi4py", "numpy"], ) mpi4py-fft-2.0.6/tests/000077500000000000000000000000001462034230000146525ustar00rootroot00000000000000mpi4py-fft-2.0.6/tests/.coveragerc000066400000000000000000000003261462034230000167740ustar00rootroot00000000000000[run] source=mpi4py_fft parallel=True omit = */utilities/nc_file.py [report] exclude_lines = pragma: no cover raise NotImplementedError except: ImportError if 0: if __name__ == .__main__.: mpi4py-fft-2.0.6/tests/runtests.sh000077500000000000000000000022021462034230000170740ustar00rootroot00000000000000#!/bin/sh set -e export OMPI_MCA_plm_ssh_agent=false export OMPI_MCA_pml=ob1 export OMPI_MCA_btl=tcp,self export OMPI_MCA_mpi_yield_when_idle=true export OMPI_MCA_btl_base_warn_component_unused=false export OMPI_MCA_rmaps_base_oversubscribe=true export PRTE_MCA_rmaps_default_mapping_policy=:oversubscribe set -x python -m coverage erase python -m coverage run -m test_fftw python -m coverage run -m test_libfft python -m coverage run -m test_io python -m coverage run -m test_darray mpiexec -n 2 python -m coverage run -m test_pencil mpiexec -n 4 python -m coverage run -m test_pencil #mpiexec -n 8 python -m coverage test_pencil.py mpiexec -n 2 python -m coverage run -m test_mpifft mpiexec -n 4 python -m coverage run -m test_mpifft #mpiexec -n 8 python -m coverage test_mpifft.py #mpiexec -n 12 python -m coverage test_mpifft.py mpiexec -n 2 python -m coverage run -m test_io mpiexec -n 4 python -m coverage run -m test_io mpiexec -n 2 python -m coverage run -m test_darray mpiexec -n 4 python -m coverage run -m test_darray mpiexec -n 2 python -m coverage run spectral_dns_solver.py python -m coverage combine python -m coverage xml mpi4py-fft-2.0.6/tests/spectral_dns_solver.py000077700000000000000000000000001462034230000277502../examples/spectral_dns_solver.pyustar00rootroot00000000000000mpi4py-fft-2.0.6/tests/test_darray.py000066400000000000000000000116101462034230000175440ustar00rootroot00000000000000import numpy as np from mpi4py import MPI from mpi4py_fft import DistArray, newDistArray, PFFT from mpi4py_fft.pencil import Subcomm comm = MPI.COMM_WORLD def test_1Darray(): N = (8,) z = DistArray(N, val=2) assert z[0] == 2 assert z.shape == N def test_2Darray(): N = (8, 8) for subcomm in ((0, 1), (1, 0), None, Subcomm(comm, (0, 1))): for rank in (0, 1, 2): M = (2,)*rank + N alignment = None if subcomm is None and rank == 1: alignment = 1 a = DistArray(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment) assert a.rank == rank assert a.global_shape == M _ = a.substart c = a.subcomm z = a.commsizes _ = a.pencil assert np.prod(np.array(z)) == comm.Get_size() if rank > 0: a0 = a[0] assert isinstance(a0, DistArray) assert a0.rank == rank-1 aa = a.v assert isinstance(aa, np.ndarray) try: k = a.get((0,)*rank+(0, slice(None))) if comm.Get_rank() == 0: assert len(k) == N[1] assert np.sum(k) == N[1] k = a.get((0,)*rank+(slice(None), 0)) if comm.Get_rank() == 0: assert len(k) == N[0] assert np.sum(k) == N[0] except ModuleNotFoundError: pass _ = a.local_slice() newaxis = (a.alignment+1)%2 _ = a.get_pencil_and_transfer(newaxis) a[:] = MPI.COMM_WORLD.Get_rank() b = a.redistribute(newaxis) a = b.redistribute(out=a) a = b.redistribute(a.alignment, out=a) s0 = MPI.COMM_WORLD.reduce(np.linalg.norm(a)**2) s1 = MPI.COMM_WORLD.reduce(np.linalg.norm(b)**2) if MPI.COMM_WORLD.Get_rank() == 0: assert abs(s0-s1) < 1e-1 c = a.redistribute(a.alignment) assert c is a def test_3Darray(): N = (8, 8, 8) for subcomm in ((0, 0, 1), (0, 1, 0), (1, 0, 0), (0, 1, 1), (1, 0, 1), (1, 1, 0), None, Subcomm(comm, (0, 0, 1))): for rank in (0, 1, 2): M = (3,)*rank + N alignment = None if subcomm is None and rank == 1: alignment = 2 a = DistArray(M, subcomm=subcomm, val=1, rank=rank, alignment=alignment) assert a.rank == rank assert a.global_shape == M _ = a.substart _ = a.subcomm z = a.commsizes _ = a.pencil assert np.prod(np.array(z)) == comm.Get_size() if rank > 0: a0 = a[0] assert isinstance(a0, DistArray) assert a0.rank == rank-1 if rank == 2: a0 = a[0, 1] assert isinstance(a0, DistArray) assert a0.rank == 0 aa = a.v assert isinstance(aa, np.ndarray) try: k = a.get((0,)*rank+(0, 0, slice(None))) if comm.Get_rank() == 0: assert len(k) == N[2] assert np.sum(k) == N[2] k = a.get((0,)*rank+(slice(None), 0, 0)) if comm.Get_rank() == 0: assert len(k) == N[0] assert np.sum(k) == N[0] except ModuleNotFoundError: pass _ = a.local_slice() newaxis = (a.alignment+1)%3 _ = a.get_pencil_and_transfer(newaxis) a[:] = MPI.COMM_WORLD.Get_rank() b = a.redistribute(newaxis) a = b.redistribute(out=a) s0 = MPI.COMM_WORLD.reduce(np.linalg.norm(a)**2) s1 = MPI.COMM_WORLD.reduce(np.linalg.norm(b)**2) if MPI.COMM_WORLD.Get_rank() == 0: assert abs(s0-s1) < 1e-1 def test_newDistArray(): N = (8, 8, 8) pfft = PFFT(MPI.COMM_WORLD, N) for forward_output in (True, False): for view in (True, False): for rank in (0, 1, 2): a = newDistArray(pfft, forward_output=forward_output, rank=rank, view=view) if view is False: assert isinstance(a, DistArray) assert a.rank == rank if rank == 0: qfft = PFFT(MPI.COMM_WORLD, darray=a) elif rank == 1: qfft = PFFT(MPI.COMM_WORLD, darray=a[0]) else: qfft = PFFT(MPI.COMM_WORLD, darray=a[0, 0]) qfft.destroy() else: assert isinstance(a, np.ndarray) assert a.base.rank == rank pfft.destroy() if __name__ == '__main__': test_1Darray() test_2Darray() test_3Darray() test_newDistArray() mpi4py-fft-2.0.6/tests/test_fftw.py000066400000000000000000000202471462034230000172360ustar00rootroot00000000000000from __future__ import print_function from time import time import numpy as np from scipy.fftpack import dctn as scipy_dctn from scipy.fftpack import dstn as scipy_dstn import scipy.fftpack # pylint: disable=unused-import from mpi4py_fft import fftw has_pyfftw = True try: import pyfftw except ImportError: has_pyfftw = False abstol = dict(f=5e-4, d=1e-12, g=1e-14) kinds = {'dst4': fftw.FFTW_RODFT11, # no scipy to compare with 'dct4': fftw.FFTW_REDFT11, # no scipy to compare with 'dst3': fftw.FFTW_RODFT01, 'dct3': fftw.FFTW_REDFT01, 'dct2': fftw.FFTW_REDFT10, 'dst2': fftw.FFTW_RODFT10, 'dct1': fftw.FFTW_REDFT00, 'dst1': fftw.FFTW_RODFT00} rkinds = {val: key for key, val in kinds.items()} def allclose(a, b): atol = abstol[a.dtype.char.lower()] return np.allclose(a, b, rtol=0, atol=atol) def test_fftw(): from itertools import product dims = (1, 2, 3) sizes = (7, 8, 10) types = '' for t in 'fdg': if fftw.get_fftw_lib(t): types += t fflags = (fftw.FFTW_ESTIMATE, fftw.FFTW_DESTROY_INPUT) iflags = (fftw.FFTW_ESTIMATE, fftw.FFTW_DESTROY_INPUT) for threads in (1, 2): for typecode in types: for dim in dims: for shape in product(*([sizes]*dim)): allaxes = tuple(reversed(range(dim))) for i in range(dim): for j in range(i+1, dim): axes = allaxes[i:j] #print(shape, axes, typecode, threads) # r2c - c2r input_array = fftw.aligned(shape, dtype=typecode) outshape = list(shape) outshape[axes[-1]] = shape[axes[-1]]//2+1 output_array = fftw.aligned(outshape, dtype=typecode.upper()) oa = output_array if typecode == 'd' else None # Test for both types of signature rfftn = fftw.rfftn(input_array, None, axes, threads, fflags, output_array=oa) A = np.random.random(shape).astype(typecode) input_array[:] = A B = rfftn() assert id(B) == id(rfftn.output_array) if has_pyfftw: B2 = pyfftw.interfaces.numpy_fft.rfftn(input_array, axes=axes) assert allclose(B, B2), np.linalg.norm(B-B2) ia = input_array if typecode == 'd' else None sa = np.take(input_array.shape, axes) if shape[axes[-1]] % 2 == 1 else None irfftn = fftw.irfftn(output_array, sa, axes, threads, iflags, output_array=ia) irfftn.input_array[...] = B A2 = irfftn(normalize=True) assert allclose(A, A2), np.linalg.norm(A-A2) hfftn = fftw.hfftn(output_array, sa, axes, threads, fflags, output_array=ia) hfftn.input_array[...] = B AC = hfftn().copy() ihfftn = fftw.ihfftn(input_array, None, axes, threads, iflags, output_array=oa) A2 = ihfftn(AC, implicit=False, normalize=True) assert allclose(A2, B), print(np.linalg.norm(A2-B)) # c2c input_array = fftw.aligned(shape, dtype=typecode.upper()) output_array = fftw.aligned_like(input_array) oa = output_array if typecode=='d' else None fftn = fftw.fftn(input_array, None, axes, threads, fflags, output_array=oa) C = np.random.random(shape).astype(typecode.upper()) fftn.input_array[...] = C D = fftn().copy() ifftn = fftw.ifftn(input_array, None, axes, threads, iflags, output_array=oa) ifftn.input_array[...] = D C2 = ifftn(normalize=True) assert allclose(C, C2), np.linalg.norm(C-C2) if has_pyfftw: D2 = pyfftw.interfaces.numpy_fft.fftn(C, axes=axes) assert allclose(D, D2), np.linalg.norm(D-D2) # r2r input_array = fftw.aligned(shape, dtype=typecode) output_array = fftw.aligned_like(input_array) oa = output_array if typecode =='d' else None for type in (1, 2, 3, 4): dct = fftw.dctn(input_array, None, axes, type, threads, fflags, output_array=oa) B = dct(A).copy() idct = fftw.idctn(input_array, None, axes, type, threads, iflags, output_array=oa) A2 = idct(B, implicit=True, normalize=True) assert allclose(A, A2), np.linalg.norm(A-A2) if typecode != 'g' and type != 4: B2 = scipy_dctn(A, axes=axes, type=type) assert allclose(B, B2), np.linalg.norm(B-B2) dst = fftw.dstn(input_array, None, axes, type, threads, fflags, output_array=oa) B = dst(A).copy() idst = fftw.idstn(input_array, None, axes, type, threads, iflags, output_array=oa) A2 = idst(B, implicit=True, normalize=True) assert allclose(A, A2), np.linalg.norm(A-A2) if typecode != 'g' and type != 4: B2 = scipy_dstn(A, axes=axes, type=type) assert allclose(B, B2), np.linalg.norm(B-B2) # Different r2r transforms along all axes. Just pick # any naxes transforms and compare with scipy naxes = len(axes) kds = np.random.randint(3, 11, size=naxes) # get naxes random transforms tsf = [rkinds[k] for k in kds] T = fftw.get_planned_FFT(input_array, input_array.copy(), axes=axes, kind=kds, threads=threads, flags=fflags) C = T(A) TI = fftw.get_planned_FFT(input_array.copy(), input_array.copy(), axes=axes, kind=list([fftw.inverse[kd] for kd in kds]), threads=threads, flags=iflags) C2 = TI(C) M = fftw.get_normalization(kds, input_array.shape, axes) assert allclose(C2*M, A) # Test vs scipy for transforms available in scipy if typecode != 'g' and not any(f in kds for f in (fftw.FFTW_RODFT11, fftw.FFTW_REDFT11)): for m, ts in enumerate(tsf): A = eval('scipy.fftpack.'+ts[:-1])(A, axis=axes[m], type=int(ts[-1])) assert allclose(C, A), np.linalg.norm(C-A) def test_wisdom(): # Test a simple export/import call fftw.export_wisdom('newwisdom.dat') fftw.import_wisdom('newwisdom.dat') fftw.forget_wisdom() def test_timelimit(): limit = 0.01 input_array = fftw.aligned((128, 128), dtype='d') t0 = time() fftw.rfftn(input_array, flags=fftw.FFTW_PATIENT) t1 = time()-t0 fftw.forget_wisdom() fftw.set_timelimit(limit) t0 = time() fftw.rfftn(input_array, flags=fftw.FFTW_PATIENT) t2 = time()-t0 assert t1 > t2 assert abs(t2-limit) < limit, print(abs(t2-limit), limit) fftw.cleanup() if __name__ == '__main__': test_fftw() test_wisdom() test_timelimit() mpi4py-fft-2.0.6/tests/test_io.py000066400000000000000000000176341462034230000167050ustar00rootroot00000000000000import functools import os from mpi4py import MPI import numpy as np from mpi4py_fft import PFFT, HDF5File, NCFile, newDistArray, generate_xdmf N = (12, 13, 14, 15) comm = MPI.COMM_WORLD ex = {True: 'c', False: 'r'} writer = {'hdf5': functools.partial(HDF5File, mode='w'), 'netcdf4': functools.partial(NCFile, mode='w')} reader = {'hdf5': functools.partial(HDF5File, mode='r'), 'netcdf4': functools.partial(NCFile, mode='r')} ending = {'hdf5': '.h5', 'netcdf4': '.nc'} def remove_if_exists(filename): try: os.remove(filename) except OSError: pass def cleanup(): import glob files = glob.glob('*.h5')+glob.glob('*.xdmf')+glob.glob('*.nc') for f in files: remove_if_exists(f) def test_2D(backend, forward_output): if backend == 'netcdf4': assert forward_output is False T = PFFT(comm, (N[0], N[1])) for i, domain in enumerate([None, ((0, np.pi), (0, 2*np.pi)), (np.arange(N[0], dtype=float)*1*np.pi/N[0], np.arange(N[1], dtype=float)*2*np.pi/N[1])]): for rank in range(3): filename = "".join(('test2D_{}{}{}'.format(ex[i == 0], ex[forward_output], rank), ending[backend])) if backend == 'netcdf4': remove_if_exists(filename) u = newDistArray(T, forward_output=forward_output, val=1, rank=rank) hfile = writer[backend](filename, domain=domain) assert hfile.backend() == backend hfile.write(0, {'u': [u]}) hfile.write(1, {'u': [u]}) u.write(hfile, 'u', 2) if rank > 0: hfile.write(0, {'u': [u]}, as_scalar=True) hfile.write(1, {'u': [u]}, as_scalar=True) u.write(hfile, 'u', 2, as_scalar=True) u.write('t'+filename, 'u', 0) u.write('t'+filename, 'u', 0, [slice(None), 3]) if not forward_output and backend == 'hdf5' and comm.Get_rank() == 0: generate_xdmf(filename) generate_xdmf(filename, order='visit') u0 = newDistArray(T, forward_output=forward_output, rank=rank) read = reader[backend](filename) read.read(u0, 'u', step=0) u0.read(filename, 'u', 2) u0.read(read, 'u', 2) assert np.allclose(u0, u) if backend == 'netcdf4': # Test opening file in mode 'a' when not existing remove_if_exists('nctesta.nc') _ = NCFile('nctesta.nc', domain=domain, mode='a') T.destroy() def test_3D(backend, forward_output): if backend == 'netcdf4': assert forward_output is False T = PFFT(comm, (N[0], N[1], N[2])) d0 = ((0, np.pi), (0, 2*np.pi), (0, 3*np.pi)) d1 = (np.arange(N[0], dtype=float)*1*np.pi/N[0], np.arange(N[1], dtype=float)*2*np.pi/N[1], np.arange(N[2], dtype=float)*3*np.pi/N[2]) for i, domain in enumerate([None, d0, d1]): for rank in range(3): filename = ''.join(('test_{}{}{}'.format(ex[i == 0], ex[forward_output], rank), ending[backend])) if backend == 'netcdf4': remove_if_exists('uv'+filename) remove_if_exists('v'+filename) u = newDistArray(T, forward_output=forward_output, rank=rank) v = newDistArray(T, forward_output=forward_output, rank=rank) h0file = writer[backend]('uv'+filename, domain=domain) h1file = writer[backend]('v'+filename, domain=domain) u[:] = np.random.random(u.shape) v[:] = 2 for k in range(3): h0file.write(k, {'u': [u, (u, [slice(None), slice(None), 4]), (u, [5, 5, slice(None)])], 'v': [v, (v, [slice(None), 6, slice(None)])]}) h1file.write(k, {'v': [v, (v, [slice(None), 6, slice(None)]), (v, [6, 6, slice(None)])]}) # One more time with same k h0file.write(k, {'u': [u, (u, [slice(None), slice(None), 4]), (u, [5, 5, slice(None)])], 'v': [v, (v, [slice(None), 6, slice(None)])]}) h1file.write(k, {'v': [v, (v, [slice(None), 6, slice(None)]), (v, [6, 6, slice(None)])]}) if rank > 0: for k in range(3): u.write('uv'+filename, 'u', k, as_scalar=True) u.write('uv'+filename, 'u', k, [slice(None), slice(None), 4], as_scalar=True) u.write('uv'+filename, 'u', k, [5, 5, slice(None)], as_scalar=True) v.write('uv'+filename, 'v', k, as_scalar=True) v.write('uv'+filename, 'v', k, [slice(None), 6, slice(None)], as_scalar=True) if not forward_output and backend == 'hdf5' and comm.Get_rank() == 0: generate_xdmf('uv'+filename) generate_xdmf('v'+filename, periodic=False) generate_xdmf('v'+filename, periodic=(True, True, True)) generate_xdmf('v'+filename, order='visit') u0 = newDistArray(T, forward_output=forward_output, rank=rank) read = reader[backend]('uv'+filename) read.read(u0, 'u', step=0) assert np.allclose(u0, u) read.read(u0, 'v', step=0) assert np.allclose(u0, v) T.destroy() def test_4D(backend, forward_output): if backend == 'netcdf4': assert forward_output is False T = PFFT(comm, (N[0], N[1], N[2], N[3])) d0 = ((0, np.pi), (0, 2*np.pi), (0, 3*np.pi), (0, 4*np.pi)) d1 = (np.arange(N[0], dtype=float)*1*np.pi/N[0], np.arange(N[1], dtype=float)*2*np.pi/N[1], np.arange(N[2], dtype=float)*3*np.pi/N[2], np.arange(N[3], dtype=float)*4*np.pi/N[3] ) for i, domain in enumerate([None, d0, d1]): for rank in range(3): filename = "".join(('h5test4_{}{}{}'.format(ex[i == 0], ex[forward_output], rank), ending[backend])) if backend == 'netcdf4': remove_if_exists('uv'+filename) u = newDistArray(T, forward_output=forward_output, rank=rank) v = newDistArray(T, forward_output=forward_output, rank=rank) h0file = writer[backend]('uv'+filename, domain=domain) u[:] = np.random.random(u.shape) v[:] = 2 for k in range(3): h0file.write(k, {'u': [u, (u, [slice(None), 4, slice(None), slice(None)])], 'v': [v, (v, [slice(None), slice(None), 5, 6])]}) if not forward_output and backend == 'hdf5' and comm.Get_rank() == 0: generate_xdmf('uv'+filename) u0 = newDistArray(T, forward_output=forward_output, rank=rank) read = reader[backend]('uv'+filename) read.read(u0, 'u', step=0) assert np.allclose(u0, u) read.read(u0, 'v', step=0) assert np.allclose(u0, v) T.destroy() if __name__ == '__main__': #pylint: disable=unused-import cleanup() skip = {'hdf5': False, 'netcdf4': False} try: import h5py except ImportError: skip['hdf5'] = True try: import netCDF4 except ImportError: skip['netcdf4'] = True for bnd in ('hdf5', 'netcdf4'): if not skip[bnd]: forw_output = [False] if bnd == 'hdf5': forw_output.append(True) for kind in forw_output: test_3D(bnd, kind) test_2D(bnd, kind) if bnd == 'hdf5': test_4D(bnd, kind) cleanup() mpi4py-fft-2.0.6/tests/test_libfft.py000066400000000000000000000124121462034230000175310ustar00rootroot00000000000000from __future__ import print_function from time import time import importlib import functools import numpy as np from mpi4py_fft import fftw from mpi4py_fft.libfft import FFT has_backend = {'fftw': True} for backend in ('pyfftw', 'mkl_fft', 'scipy', 'numpy'): has_backend[backend] = True try: importlib.import_module(backend) except ImportError: has_backend[backend] = False abstol = dict(f=5e-5, d=1e-14, g=1e-14) def allclose(a, b): atol = abstol[a.dtype.char.lower()] return np.allclose(a, b, rtol=0, atol=atol) def test_libfft(): from itertools import product dims = (1, 2, 3) sizes = (7, 8, 9) types = '' for t in 'fd': if fftw.get_fftw_lib(t): types += t+t.upper() for backend in ('pyfftw', 'mkl_fft', 'scipy', 'numpy', 'fftw'): if has_backend[backend] is False: continue t0 = 0 for typecode in types: for dim in dims: for shape in product(*([sizes]*dim)): allaxes = tuple(reversed(range(dim))) for i in range(dim): for j in range(i+1, dim): for axes in (None, allaxes[i:j]): #print(shape, axes, typecode) fft = FFT(shape, axes, dtype=typecode, backend=backend, planner_effort='FFTW_ESTIMATE') A = fft.forward.input_array B = fft.forward.output_array A[...] = np.random.random(A.shape).astype(typecode) X = A.copy() B.fill(0) t0 -= time() B = fft.forward(A, B) t0 += time() A.fill(0) t0 -= time() A = fft.backward(B, A) t0 += time() assert allclose(A, X) print('backend: ', backend, t0) # Padding is different because the physical space is padded and as such # difficult to initialize. We solve this problem by making one extra # transform for backend in ('pyfftw', 'mkl_fft', 'scipy', 'numpy', 'fftw'): if has_backend[backend] is False: continue for padding in (1.5, 2.0): for typecode in types: for dim in dims: for shape in product(*([sizes]*dim)): allaxes = tuple(reversed(range(dim))) for i in range(dim): axis = allaxes[i] axis -= len(shape) shape = list(shape) shape[axis] = int(shape[axis]*padding) #print(shape, axis, typecode, backend) fft = FFT(shape, axis, dtype=typecode, backend=backend, padding=padding, planner_effort='FFTW_ESTIMATE') A = fft.forward.input_array B = fft.forward.output_array A[...] = np.random.random(A.shape).astype(typecode) B.fill(0) B = fft.forward(A, B) X = B.copy() A.fill(0) A = fft.backward(B, A) B.fill(0) B = fft.forward(A, B) assert allclose(B, X), np.linalg.norm(B-X) for backend in ('pyfftw', 'mkl_fft', 'scipy', 'numpy', 'fftw'): if has_backend[backend] is False: continue if backend == 'fftw': dctn = functools.partial(fftw.dctn, type=3) idctn = functools.partial(fftw.idctn, type=3) transforms = {(1,): (dctn, idctn), (0, 1): (dctn, idctn)} elif backend == 'pyfftw': import pyfftw transforms = {(1,): (pyfftw.builders.rfftn, pyfftw.builders.irfftn), (0, 1): (pyfftw.builders.rfftn, pyfftw.builders.irfftn)} elif backend == 'numpy': transforms = {(1,): (np.fft.rfftn, np.fft.irfftn), (0, 1): (np.fft.rfftn, np.fft.irfftn)} elif backend == 'mkl_fft': import mkl_fft transforms = {(1,): (mkl_fft._numpy_fft.rfftn, mkl_fft._numpy_fft.irfftn), (0, 1): (mkl_fft._numpy_fft.rfftn, mkl_fft._numpy_fft.irfftn)} elif backend == 'scipy': from scipy.fftpack import fftn, ifftn transforms = {(1,): (fftn, ifftn), (0, 1): (fftn, ifftn)} for axis in ((1,), (0, 1)): fft = FFT(shape, axis, backend=backend, transforms=transforms) A = fft.forward.input_array B = fft.forward.output_array A[...] = np.random.random(A.shape) X = A.copy() B.fill(0) B = fft.forward(A, B) A.fill(0) A = fft.backward(B, A) assert allclose(A, X) if __name__ == '__main__': test_libfft() mpi4py-fft-2.0.6/tests/test_mpifft.py000066400000000000000000000310501462034230000175470ustar00rootroot00000000000000from __future__ import print_function import functools import numpy as np from mpi4py import MPI from mpi4py_fft.mpifft import PFFT from mpi4py_fft.pencil import Subcomm from mpi4py_fft.distarray import newDistArray from mpi4py_fft import fftw backends = ['fftw'] try: import pyfftw backends.append('pyfftw') except ImportError: pass abstol = dict(f=0.1, d=2e-10, g=1e-10) def allclose(a, b): atol = abstol[a.dtype.char.lower()] return np.allclose(a, b, rtol=0, atol=atol) def random_like(array): shape = array.shape dtype = array.dtype return np.random.random(shape).astype(dtype) def random_true_or_false(comm): r = 0 if comm.rank == 0: r = np.random.randint(2) r = comm.bcast(r) return r def test_r2r(): N = (5, 6, 7, 8, 9) assert MPI.COMM_WORLD.Get_size() < 6 dctn = functools.partial(fftw.dctn, type=3) idctn = functools.partial(fftw.idctn, type=3) dstn = functools.partial(fftw.dstn, type=3) idstn = functools.partial(fftw.idstn, type=3) fft = PFFT(MPI.COMM_WORLD, N, axes=((0,), (1, 2), (3, 4)), grid=(-1,), transforms={(1, 2): (dctn, idctn), (3, 4): (dstn, idstn)}) A = newDistArray(fft, forward_output=False) A[:] = np.random.random(A.shape) C = fftw.aligned_like(A) B = fft.forward(A) C = fft.backward(B, C) assert np.allclose(A, C) def test_mpifft(): from itertools import product comm = MPI.COMM_WORLD dims = (2, 3, 4,) sizes = (12, 13) assert MPI.COMM_WORLD.Get_size() < 8, "due to sizes" types = '' for t in 'fdg': if fftw.get_fftw_lib(t): types += t+t.upper() grids = {2: (None,), 3: ((-1,), None), 4: ((-1,), None)} for typecode in types: for dim in dims: for shape in product(*([sizes]*dim)): if dim < 3: n = min(shape) if typecode in 'fdg': n //= 2 n += 1 if n < comm.size: continue for grid in grids[dim]: padding = False for collapse in (True, False): for backend in backends: transforms = None if dim < 3: allaxes = [None, (-1,), (-2,), (-1, -2,), (-2, -1), (-1, 0), (0, -1), ((0,), (1,))] elif dim < 4: allaxes = [None, ((0,), (1, 2)), ((0,), (-2, -1))] elif dim > 3: allaxes = [None, ((0,), (1,), (2,), (3,)), ((0,), (1, 2, 3)), ((0,), (1,), (2, 3))] dctn = functools.partial(fftw.dctn, type=3) idctn = functools.partial(fftw.idctn, type=3) if not typecode in 'FDG': if backend == 'pyfftw': transforms = {(3,): (pyfftw.builders.rfftn, pyfftw.builders.irfftn), (2, 3): (pyfftw.builders.rfftn, pyfftw.builders.irfftn), (1, 2, 3): (pyfftw.builders.rfftn, pyfftw.builders.irfftn), (0, 1, 2, 3): (pyfftw.builders.rfftn, pyfftw.builders.irfftn)} else: transforms = {(3,): (dctn, idctn), (2, 3): (dctn, idctn), (1, 2, 3): (dctn, idctn), (0, 1, 2, 3): (dctn, idctn)} for axes in allaxes: # Test also the slab is number interface _grid = grid if grid is not None: ax = -1 if axes is not None: ax = axes[-1] if isinstance(axes[-1], int) else axes[-1][-1] _slab = (ax+1) % len(shape) _grid = [1]*(_slab+1) _grid[_slab] = 0 _comm = comm # Test also the comm is Subcomm interfaces # For PFFT the Subcomm needs to be as long as shape if len(shape) > 2 and axes is None and grid is None: _dims = [0] * len(shape) _dims[-1] = 1 # distribute all but last axis (axes is None) _comm = comm if random_true_or_false(comm) == 1: # then test Subcomm with a MPI.CART argument _dims = MPI.Compute_dims(comm.Get_size(), _dims) _comm = comm.Create_cart(_dims) _dims = None _comm = Subcomm(_comm, _dims) #print(typecode, shape, axes, collapse, _grid) fft = PFFT(_comm, shape, axes=axes, dtype=typecode, padding=padding, grid=_grid, collapse=collapse, backend=backend, transforms=transforms) #if comm.rank == 0: # grid_ = [c.size for c in fft.subcomm] # print('grid:{} shape:{} typecode:{} backend:{} axes:{}' # .format(grid_, shape, typecode, backend, axes)) assert fft.dtype(True) == fft.forward.output_array.dtype assert fft.dtype(False) == fft.forward.input_array.dtype assert len(fft.axes) == len(fft.xfftn) assert len(fft.axes) == len(fft.transfer) + 1 assert (fft.forward.input_pencil.subshape == fft.forward.input_array.shape) assert (fft.forward.output_pencil.subshape == fft.forward.output_array.shape) assert (fft.backward.input_pencil.subshape == fft.backward.input_array.shape) assert (fft.backward.output_pencil.subshape == fft.backward.output_array.shape) assert np.all(np.array(fft.global_shape(True)) == np.array(fft.forward.output_pencil.shape)) assert np.all(np.array(fft.global_shape(False)) == np.array(fft.forward.input_pencil.shape)) ax = -1 if axes is None else axes[-1] if isinstance(axes[-1], int) else axes[-1][-1] assert fft.forward.input_pencil.substart[ax] == 0 assert fft.backward.output_pencil.substart[ax] == 0 ax = 0 if axes is None else axes[0] if isinstance(axes[0], int) else axes[0][0] assert fft.forward.output_pencil.substart[ax] == 0 assert fft.backward.input_pencil.substart[ax] == 0 assert fft.dimensions == len(shape) U = random_like(fft.forward.input_array) if random_true_or_false(comm) == 1: F = fft.forward(U) V = fft.backward(F) assert allclose(V, U) else: fft.forward.input_array[...] = U fft.forward() fft.backward() V = fft.backward.output_array assert allclose(V, U) fft.destroy() padding = [1.5]*len(shape) for backend in backends: if dim < 3: allaxes = [None, (-1,), (-2,), (-1, -2,), (-2, -1), (-1, 0), (0, -1), ((0,), (1,))] elif dim < 4: allaxes = [None, ((0,), (1,), (2,)), ((0,), (-2,), (-1,))] elif dim > 3: allaxes = [None, (0, 1, -2, -1), ((0,), (1,), (2,), (3,))] for axes in allaxes: _grid = grid if grid is not None: ax = -1 if axes is not None: ax = axes[-1] if isinstance(axes[-1], int) else axes[-1][-1] _slab = (ax+1) % len(shape) _grid = [1]*(_slab+1) _grid[_slab] = 0 fft = PFFT(comm, shape, axes=axes, dtype=typecode, padding=padding, grid=_grid, backend=backend) #if comm.rank == 0: # grid = [c.size for c in fft.subcomm] # print('grid:{} shape:{} typecode:{} backend:{} axes:{}' # .format(grid, shape, typecode, backend, axes)) assert len(fft.axes) == len(fft.xfftn) assert len(fft.axes) == len(fft.transfer) + 1 assert (fft.forward.input_pencil.subshape == fft.forward.input_array.shape) assert (fft.forward.output_pencil.subshape == fft.forward.output_array.shape) assert (fft.backward.input_pencil.subshape == fft.backward.input_array.shape) assert (fft.backward.output_pencil.subshape == fft.backward.output_array.shape) ax = -1 if axes is None else axes[-1] if isinstance(axes[-1], int) else axes[-1][-1] assert fft.forward.input_pencil.substart[ax] == 0 assert fft.backward.output_pencil.substart[ax] == 0 ax = 0 if axes is None else axes[0] if isinstance(axes[0], int) else axes[0][0] assert fft.forward.output_pencil.substart[ax] == 0 assert fft.backward.input_pencil.substart[ax] == 0 U = random_like(fft.forward.input_array) F = fft.forward(U) if random_true_or_false(comm) == 1: Fc = F.copy() V = fft.backward(F) F = fft.forward(V) assert allclose(F, Fc) else: fft.backward.input_array[...] = F fft.backward() fft.forward() V = fft.forward.output_array assert allclose(F, V) # Test normalization on backward transform instead of default fft.backward.input_array[...] = F fft.backward(normalize=True) fft.forward(normalize=False) V = fft.forward.output_array assert allclose(F, V) fft.destroy() if __name__ == '__main__': test_mpifft() test_r2r() mpi4py-fft-2.0.6/tests/test_pencil.py000066400000000000000000000041001462034230000175300ustar00rootroot00000000000000from __future__ import print_function import numpy as np from mpi4py import MPI from mpi4py_fft.pencil import Subcomm, Pencil def test_pencil(): from itertools import product comm = MPI.COMM_WORLD dims = (2, 3) sizes = (7, 8, 9) types = 'fdFD' #'hilfdgFDG' for typecode in types: for dim in dims: for shape in product(*([sizes]*dim)): axes = list(range(dim)) for axis1, axis2, axis3 in product(axes, axes, axes): if axis1 == axis2: continue if axis2 == axis3: continue axis3 -= len(shape) #if comm.rank == 0: # print(shape, axis1, axis2, axis3, typecode) for pdim in [None] + list(range(1, dim-1)): subcomm = Subcomm(comm, pdim) pencil0 = Pencil(subcomm, shape) pencilA = pencil0.pencil(axis1) pencilB = pencilA.pencil(axis2) pencilC = pencilB.pencil(axis3) trans1 = Pencil.transfer(pencilA, pencilB, typecode) trans2 = Pencil.transfer(pencilB, pencilC, typecode) X = np.random.random(pencilA.subshape).astype(typecode) A = np.empty(pencilA.subshape, dtype=typecode) B = np.empty(pencilB.subshape, dtype=typecode) C = np.empty(pencilC.subshape, dtype=typecode) A[...] = X B.fill(0) trans1.forward(A, B) C.fill(0) trans2.forward(B, C) B.fill(0) trans2.backward(C, B) A.fill(0) trans1.backward(B, A) assert np.allclose(A, X) trans1.destroy() trans2.destroy() subcomm.destroy() if __name__ == '__main__': test_pencil() mpi4py-fft-2.0.6/tests/test_speed.py000066400000000000000000000114041462034230000173630ustar00rootroot00000000000000from time import time import numpy as np import pyfftw import scipy.fftpack as sp from mpi4py_fft import fftw import pickle try: #fftw.import_wisdom('wisdom.dat') pyfftw.import_wisdom(pickle.load(open('pyfftw.wisdom', 'rb'))) print('Wisdom imported') except: print('Wisdom not imported') N = (64, 64, 64) loops = 50 axis = 1 threads = 4 implicit = True flags = (fftw.FFTW_PATIENT, fftw.FFTW_DESTROY_INPUT) # Transform complex to complex #A = pyfftw.byte_align(np.random.random(N).astype('D')) #A = np.random.random(N).astype(np.dtype('D')) A = fftw.aligned(N, n=8, dtype=np.dtype('D')) A[:] = np.random.random(N).astype(np.dtype('D')) #print(A.ctypes.data % 32) input_array = fftw.aligned(A.shape, n=32, dtype=A.dtype) output_array = fftw.aligned(A.shape, n=32, dtype=A.dtype) AC = A.copy() ptime = [[], []] ftime = [[], []] stime = [[], []] for axis in ((1, 2), 0, 1, 2): axes = axis if np.ndim(axis) else [axis] # pyfftw fft = pyfftw.builders.fftn(input_array, axes=axes, threads=threads, overwrite_input=True) t0 = time() for i in range(loops): C = fft(A) ptime[0].append(time()-t0) # us fft = fftw.fftn(input_array, None, axes, threads, flags) t0 = time() for i in range(loops): C2 = fft(A, implicit=implicit) ftime[0].append(time()-t0) assert np.allclose(C, C2) # scipy if not A.dtype.char.upper() == 'G': C3 = sp.fftn(A, axes=axes) # scipy is caching, so call once before t0 = time() for i in range(loops): C3 = sp.fftn(A, axes=axes) stime[0].append(time()-t0) else: stime[0].append(0) # pyfftw ifft = pyfftw.builders.ifftn(output_array, axes=axes, threads=threads, overwrite_input=True) CC = C.copy() t0 = time() for i in range(loops): B = ifft(C, normalise_idft=True) ptime[1].append(time()-t0) # us ifft = fftw.ifftn(output_array, None, axes, threads, flags) t0 = time() for i in range(loops): B2 = ifft(C, normalize=True, implicit=implicit) ftime[1].append(time()-t0) assert np.allclose(B, B2), np.linalg.norm(B-B2) # scipy if not C.dtype.char.upper() == 'G': B3 = sp.ifftn(C, axes=axes) # scipy is caching, so call once before t0 = time() for i in range(loops): B3 = sp.ifftn(C, axes=axes) stime[1].append(time()-t0) else: stime[1].append(0) print("Timing forward transform axes (1, 2), 0, 1, 2") print("pyfftw {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ptime[0])) print("mpi4py {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ftime[0])) print("scipy {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*stime[0])) print("Timing backward transform axes (1, 2), 0, 1, 2") print("pyfftw {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ptime[1])) print("mpi4py {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ftime[1])) print("scipy {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*stime[1])) # Transform real to complex # Not scipy because they do not have rfftn #A = pyfftw.byte_align(np.random.random(N).astype('d')) A = np.random.random(N).astype(np.dtype('d', align=True)) input_array = np.zeros_like(A) ptime = [[], []] ftime = [[], []] for axis in ((1, 2), 0, 1, 2): axes = axis if np.ndim(axis) else [axis] # pyfftw rfft = pyfftw.builders.rfftn(input_array, axes=axes, threads=threads) t0 = time() for i in range(loops): C = rfft(A) ptime[0].append(time()-t0) # us rfft = fftw.rfftn(input_array, None, axes, threads, flags) t0 = time() for i in range(loops): C2 = rfft(A, implicit=implicit) ftime[0].append(time()-t0) assert np.allclose(C, C2) # pyfftw irfft = pyfftw.builders.irfftn(C.copy(), s=np.take(input_array.shape, axes), axes=axes, threads=threads) t0 = time() for i in range(loops): C2[:] = C # Because irfft is overwriting input D = irfft(C2, normalise_idft=True) ptime[1].append(time()-t0) # us irfft = fftw.irfftn(C.copy(), np.take(input_array.shape, axes), axes, threads, flags) t0 = time() for i in range(loops): C2[:] = C D2 = irfft(C2, normalize=True, implicit=implicit) ftime[1].append(time()-t0) assert np.allclose(D, D2), np.linalg.norm(D-D2) print("Timing real forward transform axes (1, 2), 0, 1, 2") print("pyfftw {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ptime[0])) print("mpi4py {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ftime[0])) print("Timing real backward transform axes (1, 2), 0, 1, 2") print("pyfftw {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ptime[1])) print("mpi4py {0:2.4e} {1:2.4e} {2:2.4e} {3:2.4e}".format(*ftime[1])) fftw.export_wisdom('wisdom.dat')