pax_global_header00006660000000000000000000000064131776523650014531gustar00rootroot0000000000000052 comment=f34b1ca101ffaf9e04eff996df71215cdebdc9f7 gpyfft-0.7.1/000077500000000000000000000000001317765236500130355ustar00rootroot00000000000000gpyfft-0.7.1/.gitattributes000066400000000000000000000000141317765236500157230ustar00rootroot00000000000000* text=auto gpyfft-0.7.1/.gitignore000066400000000000000000000001161317765236500150230ustar00rootroot00000000000000build *.pyc gpyfft.egg-info dist gpyfftlib.cpp *.so .cache .ipynb_checkpoints gpyfft-0.7.1/LICENSE.txt000066400000000000000000000167431317765236500146730ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. gpyfft-0.7.1/MANIFEST.in000066400000000000000000000001641317765236500145740ustar00rootroot00000000000000include README.md include MANIFEST.in include LICENSE.txt include gpyfft/gpyfftlib.pxd include gpyfft/gpyfftlib.pyx gpyfft-0.7.1/README.md000066400000000000000000000073361317765236500143250ustar00rootroot00000000000000gpyfft ====== A Python wrapper for the OpenCL FFT library clFFT. ## Introduction ### clFFT The open source library [clFFT] implements FFT for running on a GPU via OpenCL. Some highlights are: * batched 1D, 2D, and 3D transforms * supports many transform sizes (any combinatation of powers of 2,3,5,7,11, and 13) * flexible memory layout * single and double precisions * complex and real-to-complex transforms * supports injecting custom code for data pre- and post-processing ### gpyfft This python wrapper is designed to tightly integrate with [PyOpenCL]. It consists of a low-level Cython based wrapper with an interface similar to the underlying C library. On top of that it offers a high-level interface designed to work on data contained in instances of `pyopencl.array.Array`, a numpy work-alike array class for GPU computations. The high-level interface takes some inspiration from [pyFFTW]. For details of the high-level interface see [fft.py]. ## Status The low lever interface is complete (more or less), the high-level interface is not yet settled and likely to change in future. Features to come (not yet implemented in the high-level interface): ### work done - low level wrapper (mostly) completed - high level wrapper * complex-to-complex transform, in- and out-of-place * real-to-complex transform (out-of-place) * complex-to-real transform (out-of-place) * single precision * double precision * interleaved data * support injecting custom OpenCL code (pre and post callbacks) * accept pyopencl arrays with non-zero offsets (Syam Gadde) ## Basic usage Here we describe a simple example of performing a batch of 2D complex-to-complex FFT transforms on the GPU, using the high-level interface of gpyfft. The full source code of this example ist contained in [simple\_example.py], which is the essence of [benchmark.py]. Note, for testing it is recommended to start [simple\_example.py] from the command line, so you have the possibility to interactively choose an OpenCL context (otherwise, e.g. when using an IPython, you are not asked end might end up with a CPU device, which is prone to fail). imports: ``` python import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft.fft import FFT ``` initialize GPU: ``` python context = cl.create_some_context() queue = cl.CommandQueue(context) ``` initialize memory (on host and GPU). In this example we want to perform in parallel four 2D FFTs for 1024x1024 single precision data. ``` python data_host = np.zeros((4, 1024, 1024), dtype = np.complex64) #data_host[:] = some_useful_data data_gpu = cla.to_device(queue, data_host) ``` create FFT transform plan for batched inline 2D transform along second two axes. ``` python transform = FFT(context, queue, data_gpu, axes = (2, 1)) ``` If you want an out-of-place transform, provide the output array as additional argument after the input data. Start the work and wait until it is finished (Note that enqueu() returns a tuple of events) ``` python event, = transform.enqueue() event.wait() ``` Read back the data from the GPU to the host ``` python result_host = data_gpu.get() ``` ## Benchmark A simple benchmark is contained as a submodule, you can run it on the command line by `python -m gpyfft.benchmark`, or from Python ``` python import gpyfft.benchmark gpyfft.benchmark.run() ``` Note, you might want to set the `PYOPENCL_CTX` environment variable to select your OpenCL platform and device. [clFFT]: https://github.com/clMathLibraries/clFFT [pyFFTW]: https://github.com/hgomersall/pyFFTW [PyOpenCL]: https://mathema.tician.de/software/pyopencl [fft.py]: gpyfft/fft.py [pyfft]: http://github.com/Manticore/pyfft [simple\_example.py]: examples/simple_example.py [benchmark.py]: gpyfft/benchmark.py gpyfft-0.7.1/docs/000077500000000000000000000000001317765236500137655ustar00rootroot00000000000000gpyfft-0.7.1/docs/Makefile000066400000000000000000000131761317765236500154350ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = #BUILDDIR = ../../gpyfft-docs BUILDDIR = build #PDFBUILDDIR = /tmp PDFBUILDDIR = build #PDF = ../manual.pdf PDF = manual.pdf # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/gpyfft.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/gpyfft.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/gpyfft" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/gpyfft" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(PDFBUILDDIR)/latex all-pdf cp $(PDFBUILDDIR)/latex/*.pdf $(PDF) @echo "pdflatex finished; the PDF files are in $(PDFBUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." gpyfft-0.7.1/docs/make.bat000066400000000000000000000126671317765236500154060ustar00rootroot00000000000000@echo off REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=..\..\gpyfft-docs set PDFBUILDDIR=\tmp set PDF= ..\manual.pdf set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source set I18NSPHINXOPTS=%SPHINXOPTS% source if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. latexpdf to make pdf files echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( echo Don't do that rem for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i rem del /q /s %BUILDDIR%\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\gpyfft.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\gpyfft.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %PDFBUILDDIR%/latex chdir /D %PDFBUILDDIR%\latex pdflatex %PDFBUILDDIR%\latex\gpyfft.tex chdir /D %~dp0 copy %PDFBUILDDIR%\latex\gpyfft.pdf %PDF% if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %PDFBUILDDIR%\latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) :end gpyfft-0.7.1/docs/source/000077500000000000000000000000001317765236500152655ustar00rootroot00000000000000gpyfft-0.7.1/docs/source/building.rst000066400000000000000000000001471317765236500176160ustar00rootroot00000000000000Building gpyfft *************** Here will be detailed instructions for building gpyfft from source. gpyfft-0.7.1/docs/source/code.rst000066400000000000000000000002151317765236500167270ustar00rootroot00000000000000gpyfft class structure ====================== .. automodule:: gpyfft .. toctree:: :maxdepth: 2 gpyfft plan gpyfft_error gpyfft-0.7.1/docs/source/conf.py000066400000000000000000000172701317765236500165730ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # gpyfft documentation build configuration file, created by # sphinx-quickstart on Wed Jun 06 11:48:24 2012. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc', 'numpydoc', 'sphinx.ext.autosummary'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_parsers = { '.md': 'recommonmark.parser.CommonMarkParser', } source_suffix = ['.rst', '.md'] # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'gpyfft' copyright = u'2012, Gregor Thalhammer' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.1' # The full version, including alpha/beta/rc tags. release = '0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'gpyfftdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'gpyfft.tex', u'gpyfft Documentation', u'Gregor Thalhammer', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'gpyfft', u'gpyfft Documentation', [u'Gregor Thalhammer'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'gpyfft', u'gpyfft Documentation', u'Gregor Thalhammer', 'gpyfft', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' gpyfft-0.7.1/docs/source/gpyfft.rst000066400000000000000000000001261317765236500173150ustar00rootroot00000000000000GpyFFT ****** .. autoclass:: gpyfft.GpyFFT :members: get_version, create_plan gpyfft-0.7.1/docs/source/gpyfft_error.rst000066400000000000000000000001111317765236500205200ustar00rootroot00000000000000GpyFFT_Error ************ .. autoclass:: gpyfft.GpyFFT_Error :members: gpyfft-0.7.1/docs/source/includeme.rst000066400000000000000000000000351317765236500177620ustar00rootroot00000000000000.. include:: ../../README.md gpyfft-0.7.1/docs/source/index.rst000066400000000000000000000006251317765236500171310ustar00rootroot00000000000000.. gpyfft documentation master file, created by sphinx-quickstart on Wed Jun 06 11:48:24 2012. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. gpyfft ====== .. toctree:: :numbered: :maxdepth: 2 includeme building code Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` gpyfft-0.7.1/docs/source/plan.rst000066400000000000000000000003701317765236500167510ustar00rootroot00000000000000Plan ==== .. autoclass:: gpyfft.Plan :members: __init__, precision, scale_forward, scale_backward, batch_size, get_dim, shape, strides_in, strides_out, distances, layouts, inplace, temp_array_size, transpose_result, bake, enqueue_transform gpyfft-0.7.1/examples/000077500000000000000000000000001317765236500146535ustar00rootroot00000000000000gpyfft-0.7.1/examples/simple_example.py000066400000000000000000000006621317765236500202350ustar00rootroot00000000000000import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft.fft import FFT context = cl.create_some_context() queue = cl.CommandQueue(context) data_host = np.zeros((4, 1024, 1024), dtype = np.complex64) #data_host[:] = some_useful_data data_gpu = cla.to_device(queue, data_host) transform = FFT(context, queue, data_gpu, axes = (2, 1)) event, = transform.enqueue() event.wait() result_host = data_gpu.get() gpyfft-0.7.1/gpyfft/000077500000000000000000000000001317765236500143345ustar00rootroot00000000000000gpyfft-0.7.1/gpyfft/__init__.py000066400000000000000000000002631317765236500164460ustar00rootroot00000000000000from __future__ import absolute_import import logging logging.basicConfig() from .version import __version__ from .gpyfftlib import GpyFFT, GpyFFT_Error, Plan from .fft import * gpyfft-0.7.1/gpyfft/benchmark.py000066400000000000000000000115351317765236500166450ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function import timeit import numpy as np from numpy.fft import fftn as npfftn from numpy.testing import assert_array_almost_equal, assert_allclose import pyopencl as cl import pyopencl.array as cla from gpyfft import FFT from gpyfft.gpyfftlib import GpyFFT_Error #real to complex: (forward) out_array.shape[axes][-1] = in_array.shape[axes][-1]//2 + 1 def run(double_precision=False): context = cl.create_some_context() queue = cl.CommandQueue(context) dtype = np.complex64 if not double_precision else np.complex128 n_run = 100 #set to 1 for testing for correct result if n_run > 1: nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype) else: nd_dataC = np.ones((1024, 1024), dtype = dtype) #set n_run to 1 nd_dataF = np.asfortranarray(nd_dataC) dataC = cla.to_device(queue, nd_dataC) dataF = cla.to_device(queue, nd_dataF) nd_result = np.zeros_like(nd_dataC, dtype = dtype) resultC = cla.to_device(queue, nd_result) resultF = cla.to_device(queue, np.asfortranarray(nd_result)) result = resultF axes_list = [(-2,-1), (-1,-2), None] #batched 2d transforms if True: print('out of place transforms', dataC.shape, dataC.dtype) print('axes in out') for axes in axes_list: for data in (dataC, dataF): for result in (resultC, resultF): t_ms, gflops = 0, 0 try: transform = FFT(context, queue, data, result, axes = axes) #transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C) print('%-10s %3s %3s' % ( axes, 'C' if data.flags.c_contiguous else 'F', 'C' if result.flags.c_contiguous else 'F', ), end=' ', ) tic = timeit.default_timer() for i in range(n_run): events = transform.enqueue() #events = transform.enqueue(False) for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3*(toc-tic)/n_run gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms) npfft_result = npfftn(nd_dataC, axes = axes) if transform.plan.transpose_result: npfft_result = np.swapaxes(npfft_result, axes[0], axes[1]) max_error = np.max(abs(result.get() - npfft_result)) print('%8.1e'%max_error, end=' ') assert_allclose(result.get(), npfft_result, atol = 1e-8 if double_precision else 1e-3, rtol = 1e-8 if double_precision else 1e-3) #assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)), # 1e-4) except GpyFFT_Error as e: print(e) except AssertionError as e: print(e) except Exception as e: print(e) finally: print('%5.2fms %6.2f Gflops' % (t_ms, gflops) ) print('in place transforms', nd_dataC.shape, nd_dataC.dtype) for axes in axes_list: for nd_data in (nd_dataC, nd_dataF): data = cla.to_device(queue, nd_data) transform = FFT(context, queue, data, axes = axes) #transform.plan.transpose_result = True #not implemented tic = timeit.default_timer() for i in range(n_run): # inplace transform fails for n_run > 1 events = transform.enqueue() for e in events: e.wait() toc = timeit.default_timer() t_ms = 1e3*(toc-tic)/n_run gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms) print('%-10s %3s %5.2fms %6.2f Gflops' % ( axes, 'C' if data.flags.c_contiguous else 'F', t_ms, gflops )) #assert_array_almost_equal(data.get(queue=queue), npfftn(nd_data, axes = axes)) #never fails ???? if __name__ == '__main__': run() run(double_precision=True) gpyfft-0.7.1/gpyfft/fft.py000066400000000000000000000235411317765236500154720ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function from .gpyfftlib import GpyFFT import gpyfft.gpyfftlib as gfft import pyopencl as cl GFFT = GpyFFT(debug=False) import pyopencl as cl import numpy as np # TODO: class FFT(object): def __init__(self, context, queue, in_array, out_array=None, axes = None, fast_math = False, real=False, callbacks=None, #dict: 'pre', 'post' ): # Callbacks: dict(pre=b'pre source (kernel named pre!)') self.context = context self.queue = queue # if no axes are given, transform all axes, select axes order for good performance depending on memory layout if axes is None: if in_array.flags.c_contiguous: axes = np.arange(in_array.ndim)[::-1] elif in_array.flags.f_contiguous: axes = np.arange(in_array.ndim) else: axes = np.arange(in_array.dim)[::-1] # TODO: find good heuristics for this (rare), e.g. based on strides else: axes = np.asarray(axes) t_strides_in, t_distance_in, t_batchsize_in, t_shape, axes_transform = self.calculate_transform_strides(axes, in_array) if out_array is not None: t_inplace = False t_strides_out, t_distance_out, t_batchsize_out, t_shape_out, axes_transform_out = self.calculate_transform_strides( axes, out_array) if in_array.base_data is out_array.base_data: t_inplace = True #assert t_batchsize_out == t_batchsize_in and t_shape == t_shape_out, 'input and output size does not match' #TODO: fails for real-to-complex assert np.all(axes_transform == axes_transform_out), 'error finding transform axis (consider setting axes argument)' else: t_inplace = True t_strides_out, t_distance_out = t_strides_in, t_distance_in #assert np.issubclass(in_array.dtype, np.complexfloating) and \ # np.issubclass(in_array.dtype, np.complexfloating), \ #precision (+ fast_math!) #complex64 <-> complex64 #complex128 <-> complex128 if in_array.dtype in (np.float32, np.complex64): precision = gfft.CLFFT_SINGLE elif in_array.dtype in (np.float64, np.complex128): precision = gfft.CLFFT_DOUBLE #TODO: add assertions that precision match if in_array.dtype in (np.float32, np.float64): layout_in = gfft.CLFFT_REAL layout_out = gfft.CLFFT_HERMITIAN_INTERLEAVED expected_out_shape = list(in_array.shape) expected_out_shape[axes_transform[0]] = expected_out_shape[axes_transform[0]]//2 + 1 assert out_array.shape == tuple(expected_out_shape), \ 'output array shape %s does not match expected shape: %s'%(out_array.shape,expected_out_shape) elif in_array.dtype in (np.complex64, np.complex128): if not real: layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED layout_out = gfft.CLFFT_COMPLEX_INTERLEAVED else: # complex-to-real transform layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED layout_out = gfft.CLFFT_REAL t_shape = t_shape_out if t_inplace and ((layout_in is gfft.CLFFT_REAL) or (layout_out is gfft.CLFFT_REAL)): assert ((in_array.strides[axes_transform[0]] == in_array.dtype.itemsize) and \ (out_array.strides[axes_transform[0]] == out_array.dtype.itemsize)), \ 'inline real transforms need stride 1 for first transform axis' self.t_shape = t_shape self.batchsize = t_batchsize_in plan = GFFT.create_plan(context, t_shape) plan.inplace = t_inplace plan.strides_in = t_strides_in plan.strides_out = t_strides_out plan.distances = (t_distance_in, t_distance_out) plan.batch_size = self.batchsize plan.precision = precision plan.layouts = (layout_in, layout_out) if callbacks is not None: if callbacks.has_key('pre'): plan.set_callback(b'pre', callbacks['pre'], 'pre') if 'post' in callbacks: plan.set_callback(b'post', callbacks['post'], 'post') if False: print('axes', axes ) print('in_array.shape: ', in_array.shape) print('in_array.strides/itemsize', tuple(s // in_array.dtype.itemsize for s in in_array.strides)) print('shape transform ', t_shape) print('layout_in ', str(layout_in).split('.')[1]) print('t_strides ', t_strides_in) print('distance_in ', t_distance_in) print('batchsize ', t_batchsize_in) print('layout_out ', str(layout_out).split('.')[1]) print('t_stride_out ', t_strides_out) print('inplace ', t_inplace) plan.bake(self.queue) temp_size = plan.temp_array_size if temp_size: #print 'temp_size:', plan.temp_array_size self.temp_buffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size = temp_size) else: self.temp_buffer = None self.plan = plan self.data = in_array self.result = out_array @classmethod def calculate_transform_strides(cls, axes_transform, array): shape = np.array(array.shape) strides = np.array(array.strides) dtype = array.dtype ddim = len(shape) #dimensionality data tdim = len(axes_transform) #dimensionality transform assert tdim <= ddim # transform negative axis values (e.g. -1 for last axis) to positive axes_transform[axes_transform<0] += ddim # remaining, non-transformed axes axes_notransform = np.lib.arraysetops.setdiff1d(range(ddim), axes_transform) #sort non-transformed axes by strides axes_notransform = axes_notransform[np.argsort(strides[axes_notransform])] #print "axes_notransformed sorted", axes_notransform # -> list of collapsable axes, [ [x,y], [z] ] collapsable_axes_list = [] #result collapsable_axes_candidates = axes_notransform[:1].tolist() #intermediate list of collapsable axes (magic code to get empty list if axes_notransform is empty) for a in axes_notransform[1:]: if strides[a] == strides[collapsable_axes_candidates[-1]] * shape[collapsable_axes_candidates[-1]]: collapsable_axes_candidates.append(a) #add axes to intermediate list of collapsable axes else: #does not fit into current intermediate list of collapsable axes collapsable_axes_list.append(collapsable_axes_candidates) #store away intermediate list collapsable_axes_candidates = [a] #start new intermediate list collapsable_axes_list.append(collapsable_axes_candidates) #append last intermediate list to assert len(collapsable_axes_list) == 1, 'data layout not supported (only single non-transformed axis allowd)' #all non-transformed axes collapsed axes_notransform = collapsable_axes_list[0] #all axes collapsable: take single group of collapsable axes t_distances = strides[axes_notransform]//dtype.itemsize if len(t_distances) == 0: t_distance = 0 else: t_distance = t_distances[0] #takes smalles stride (axes_notransform have been sorted by stride size) batchsize = np.prod(shape[axes_notransform]) t_shape = shape[axes_transform] t_strides = strides[axes_transform]//dtype.itemsize return (tuple(t_strides), t_distance, batchsize, tuple(t_shape), tuple(axes_transform)) #, tuple(axes_notransform)) def enqueue(self, forward = True, wait_for_events = None): return self.enqueue_arrays(forward=forward, data=self.data, result=self.result, wait_for_events=wait_for_events) def enqueue_arrays(self, data = None, result = None, forward = True, wait_for_events = None): """enqueue transform""" if data is None: data = self.data else: assert data.shape == self.data.shape assert data.strides == self.data.strides assert data.dtype == self.data.dtype if result is None: result = self.result else: assert result.shape == self.result.shape assert result.strides == self.result.strides assert result.dtype == self.result.dtype # get buffer for data if data.offset != 0: data = data._new_with_changes(data=data.base_data[data.offset:], offset=0) data_buffer = data.base_data if result is not None: # get buffer for result if result.offset != 0: result = result._new_with_changes(data=result.base_data[result.offset:], offset=0) result_buffer = result.base_data events = self.plan.enqueue_transform((self.queue,), (data_buffer,), (result_buffer), direction_forward = forward, temp_buffer = self.temp_buffer, wait_for_events = wait_for_events) else: events = self.plan.enqueue_transform((self.queue,), (data_buffer,), direction_forward = forward, temp_buffer = self.temp_buffer, wait_for_events = wait_for_events) return events def update_arrays(self, input_array, output_array): pass gpyfft-0.7.1/gpyfft/gpyfftlib.pxd000066400000000000000000000203451317765236500170430ustar00rootroot00000000000000cdef extern from "clFFT.h": ctypedef int cl_int ctypedef unsigned int cl_uint ctypedef unsigned long int cl_ulong ctypedef float cl_float ctypedef void* cl_context ctypedef void* cl_command_queue ctypedef void* cl_event ctypedef void* cl_mem # cdef struct _cl_context: # pass # ctypedef _cl_context *cl_context # cdef struct _cl_command_queue: # pass # ctypedef _cl_command_queue *cl_command_queue # cdef struct _cl_event: # pass # ctypedef _cl_event *cl_event # cdef struct _cl_mem: # pass # ctypedef _cl_mem *cl_mem enum: CLFFT_DUMP_PROGRAMS ##define constant cpdef enum clfftStatus_: CLFFT_INVALID_GLOBAL_WORK_SIZE CLFFT_INVALID_MIP_LEVEL CLFFT_INVALID_BUFFER_SIZE CLFFT_INVALID_GL_OBJECT CLFFT_INVALID_OPERATION CLFFT_INVALID_EVENT CLFFT_INVALID_EVENT_WAIT_LIST CLFFT_INVALID_GLOBAL_OFFSET CLFFT_INVALID_WORK_ITEM_SIZE CLFFT_INVALID_WORK_GROUP_SIZE CLFFT_INVALID_WORK_DIMENSION CLFFT_INVALID_KERNEL_ARGS CLFFT_INVALID_ARG_SIZE CLFFT_INVALID_ARG_VALUE CLFFT_INVALID_ARG_INDEX CLFFT_INVALID_KERNEL CLFFT_INVALID_KERNEL_DEFINITION CLFFT_INVALID_KERNEL_NAME CLFFT_INVALID_PROGRAM_EXECUTABLE CLFFT_INVALID_PROGRAM CLFFT_INVALID_BUILD_OPTIONS CLFFT_INVALID_BINARY CLFFT_INVALID_SAMPLER CLFFT_INVALID_IMAGE_SIZE CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR CLFFT_INVALID_MEM_OBJECT CLFFT_INVALID_HOST_PTR CLFFT_INVALID_COMMAND_QUEUE CLFFT_INVALID_QUEUE_PROPERTIES CLFFT_INVALID_CONTEXT CLFFT_INVALID_DEVICE CLFFT_INVALID_PLATFORM CLFFT_INVALID_DEVICE_TYPE CLFFT_INVALID_VALUE CLFFT_MAP_FAILURE CLFFT_BUILD_PROGRAM_FAILURE CLFFT_IMAGE_FORMAT_NOT_SUPPORTED CLFFT_IMAGE_FORMAT_MISMATCH CLFFT_MEM_COPY_OVERLAP CLFFT_PROFILING_INFO_NOT_AVAILABLE CLFFT_OUT_OF_HOST_MEMORY CLFFT_OUT_OF_RESOURCES CLFFT_MEM_OBJECT_ALLOCATION_FAILURE CLFFT_COMPILER_NOT_AVAILABLE CLFFT_DEVICE_NOT_AVAILABLE CLFFT_DEVICE_NOT_FOUND CLFFT_SUCCESS CLFFT_BUGCHECK CLFFT_NOTIMPLEMENTED CLFFT_TRANSPOSED_NOTIMPLEMENTED CLFFT_FILE_NOT_FOUND CLFFT_FILE_CREATE_FAILURE CLFFT_VERSION_MISMATCH CLFFT_INVALID_PLAN CLFFT_DEVICE_NO_DOUBLE CLFFT_DEVICE_MISMATCH ctypedef clfftStatus_ clfftStatus cpdef enum clfftDim_: CLFFT_1D CLFFT_2D CLFFT_3D ctypedef clfftDim_ clfftDim cpdef enum clfftLayout_: CLFFT_COMPLEX_INTERLEAVED CLFFT_COMPLEX_PLANAR CLFFT_HERMITIAN_INTERLEAVED CLFFT_HERMITIAN_PLANAR CLFFT_REAL ctypedef clfftLayout_ clfftLayout cpdef enum clfftPrecision_: CLFFT_SINGLE CLFFT_DOUBLE CLFFT_SINGLE_FAST CLFFT_DOUBLE_FAST ctypedef clfftPrecision_ clfftPrecision cpdef enum clfftDirection_: CLFFT_FORWARD CLFFT_BACKWARD CLFFT_MINUS CLFFT_PLUS ctypedef clfftDirection_ clfftDirection cpdef enum clfftResultLocation_: CLFFT_INPLACE CLFFT_OUTOFPLACE ctypedef clfftResultLocation_ clfftResultLocation cpdef enum clfftResultTransposed_: CLFFT_NOTRANSPOSE CLFFT_TRANSPOSED ctypedef clfftResultTransposed_ clfftResultTransposed cdef struct clfftSetupData_: cl_uint major cl_uint minor cl_uint patch cl_ulong debugFlags ctypedef clfftSetupData_ clfftSetupData cpdef enum clfftCallbackType_: PRECALLBACK POSTCALLBACK ctypedef clfftCallbackType_ clfftCallbackType ctypedef size_t clfftPlanHandle clfftStatus clfftInitSetupData(clfftSetupData *setupData) clfftStatus clfftSetup(const clfftSetupData *setupData) clfftStatus clfftTeardown() clfftStatus clfftGetVersion(cl_uint *major, cl_uint *minor, cl_uint *patch) clfftStatus clfftCreateDefaultPlan(clfftPlanHandle *plHandle, cl_context context, #const clfftDim dim, clfftDim dim, const size_t *clLengths) clfftStatus clfftCopyPlan(clfftPlanHandle *out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle) clfftStatus clfftBakePlan(clfftPlanHandle plHandle, cl_uint numQueues, cl_command_queue *commQueueFFT, #void (*pfn_notify)(unsigned long, void *), void (*pfn_notify)(clfftPlanHandle plHandle, void *user_data), void *user_data) clfftStatus clfftDestroyPlan(clfftPlanHandle *plHandle) clfftStatus clfftGetPlanContext(const clfftPlanHandle plHandle, cl_context *context) clfftStatus clfftGetPlanPrecision(const clfftPlanHandle plHandle, clfftPrecision *precision) clfftStatus clfftSetPlanPrecision(clfftPlanHandle plHandle, clfftPrecision precision) clfftStatus clfftGetPlanScale(const clfftPlanHandle plHandle, clfftDirection dir, cl_float *scale) clfftStatus clfftSetPlanScale(clfftPlanHandle plHandle, clfftDirection dir, cl_float scale) clfftStatus clfftGetPlanBatchSize(const clfftPlanHandle plHandle, size_t *batchSize) clfftStatus clfftSetPlanBatchSize(clfftPlanHandle plHandle, size_t batchSize) clfftStatus clfftGetPlanDim(const clfftPlanHandle plHandle, clfftDim *dim, cl_uint *size) clfftStatus clfftSetPlanDim(clfftPlanHandle plHandle, const clfftDim dim) clfftStatus clfftGetPlanLength(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clLengths) clfftStatus clfftSetPlanLength(clfftPlanHandle plHandle, const clfftDim dim, const size_t *clLengths) clfftStatus clfftGetPlanInStride(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides) clfftStatus clfftSetPlanInStride(clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides) clfftStatus clfftGetPlanOutStride(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides) clfftStatus clfftSetPlanOutStride(clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides) clfftStatus clfftGetPlanDistance(const clfftPlanHandle plHandle, size_t *iDist, size_t *oDist) clfftStatus clfftSetPlanDistance(clfftPlanHandle plHandle, size_t iDist, size_t oDist) clfftStatus clfftGetLayout(const clfftPlanHandle plHandle, clfftLayout *iLayout, clfftLayout *oLayout) clfftStatus clfftSetLayout(clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout) clfftStatus clfftGetResultLocation(const clfftPlanHandle plHandle, clfftResultLocation *placeness) clfftStatus clfftSetResultLocation(clfftPlanHandle plHandle, clfftResultLocation placeness) clfftStatus clfftGetPlanTransposeResult(const clfftPlanHandle plHandle, clfftResultTransposed *transposed) clfftStatus clfftSetPlanTransposeResult(clfftPlanHandle plHandle, clfftResultTransposed transposed) clfftStatus clfftGetTmpBufSize(const clfftPlanHandle plHandle, size_t *buffersize) clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle, const char* funcName, const char* funcString, int localMemSize, clfftCallbackType callbackType, cl_mem *userdata, int numUserdataBuffers) clfftStatus clfftEnqueueTransform(clfftPlanHandle plHandle, clfftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue *commQueues, cl_uint numWaitEvents, const cl_event *waitEvents, cl_event *outEvents, cl_mem *inputBuffers, cl_mem *outputBuffers, cl_mem tmpBuffer ) gpyfft-0.7.1/gpyfft/gpyfftlib.pyx000066400000000000000000000536161317765236500170770ustar00rootroot00000000000000# -*- coding: latin-1 -*- """ .. module:: gpyfft :platform: Windows, Linux :synopsis: A Python wrapper for the OpenCL FFT library clFFT .. moduleauthor:: Gregor Thalhammer """ cimport cython import pyopencl as cl from libc.stdlib cimport malloc, free import atexit try: from weakref import finalize except ImportError: from backports.weakref import finalize ctypedef size_t voidptr_t DEF MAX_QUEUES = 5 DEF MAX_WAITFOR_EVENTS = 10 error_dict = { CLFFT_SUCCESS: 'no error', CLFFT_BUGCHECK: 'Bugcheck', CLFFT_NOTIMPLEMENTED: 'Functionality is not implemented yet.', CLFFT_TRANSPOSED_NOTIMPLEMENTED: 'Transposed functionality is not implemented for this transformation.', CLFFT_FILE_NOT_FOUND: 'Tried to open an existing file on the host system, but failed.', CLFFT_FILE_CREATE_FAILURE: 'Tried to create a file on the host system, but failed.', CLFFT_VERSION_MISMATCH: 'Version conflict between client and library.', CLFFT_INVALID_PLAN: 'Invalid plan.', CLFFT_DEVICE_NO_DOUBLE: 'Double precision not supported on this device.', CLFFT_DEVICE_MISMATCH: 'Attempt to run on a device using a plan baked for a different device', } class GpyFFT_Error(Exception): """Exception wrapper for errors returned from underlying library calls""" def __init__(self, errorcode): self.errorcode = errorcode def __str__(self): error_desc = error_dict.get(self.errorcode) if error_desc is None: try: error_desc = cl.status_code.to_string(self.errorcode) except ValueError: error_desc = "unknown error %d", self.errorcode return repr(error_desc) cdef inline bint errcheck(clfftStatus result) except True: cdef bint is_error = (result != CLFFT_SUCCESS) if is_error: raise GpyFFT_Error(result) return is_error _initialized=False #main class cdef class GpyFFT(object): """The GpyFFT object is the primary interface to the clFFT library""" def __cinit__(self, debug = False): if not _initialized: GpyFFT._initialize(debug) @classmethod @cython.binding(True) def _initialize(cls, debug = False): # print 'initialize clfft' global _initialized if _initialized: raise RuntimeError('GpyFFT is already initialized') cdef clfftSetupData setup_data errcheck(clfftInitSetupData(&setup_data)) if debug: setup_data.debugFlags |= CLFFT_DUMP_PROGRAMS errcheck(clfftSetup(&setup_data)) _initialized=True atexit.register(GpyFFT._teardown) @classmethod @cython.binding(True) def _teardown(cls): # print 'teardown clfft' errcheck(clfftTeardown()) global _initialized _initialized=False def get_version(self): """returns the version of the underlying clFFT library Parameters ---------- None Returns ------- out : tuple the major, minor, and patch level of the clFFT library Raises ------ GpyFFT_Error An error occurred accessing the clfftGetVersion function Notes ----- The underlying clFFT call is 'clfftCreateDefaultPlan' """ cdef cl_uint major, minor, patch errcheck(clfftGetVersion(&major, &minor, &patch)) return (major, minor, patch) def create_plan(self, context, tuple shape): """creates an FFT Plan object based on the requested dimensionality Parameters ---------- context : `pypencl.Context` shape : tuple of int containing from one to three integers, specifying the length of each requested dimension of the FFT Returns ------- plan : `Plan` The generated gpyfft.Plan. Raises ------ ValueError when `shape` isn't a tuple of length 1, 2 or 3 TypeError when the context argument is not a `pyopencl.Context` """ return Plan(context, shape, self) cdef _destroy_plan(clfftPlanHandle plan): cdef clfftPlanHandle p=plan #print 'destroy plan', p errcheck(clfftDestroyPlan(&p)) #@cython.internal cdef class Plan(object): """A plan is the collection of (almost) all parameters needed to specify an FFT computation. This includes: * What pyopencl context executes the transform? * Is this a 1D, 2D or 3D transform? * What are the lengths or extents of the data in each dimension? * How many datasets are being transformed? * What is the data precision? * Should a scaling factor be applied to the transformed data? * Does the output transformed data replace the original input data in the same buffer (or buffers), or is the output data written to a different buffer (or buffers). * How is the input data stored in its data buffers? * How is the output data stored in its data buffers? The plan does not include: * The pyopencl handles to the input and output data buffers. * The pyopencl handle to a temporary scratch buffer (if needed). * Whether to execute a forward or reverse transform. These are specified later, when the plan is executed. """ cdef object __weakref__ cdef clfftPlanHandle plan cdef object lib #def __dealloc__(self): #if self.plan: # errcheck(clfftDestroyPlan(&self.plan)) #print 'dealloc plan', self.plan def __cinit__(self): self.plan = 0 def __init__(self, context, tuple shape, lib): """Instantiates a Plan object Plan objects are created internally by gpyfft; normally a user does not create these objects Parameters ---------- contex : pyopencl.Context http://documen.tician.de/pyopencl/runtime.html#pyopencl.Context shape : tuple the dimensionality of the transform lib : no idea this is a thing that does lib things Raises ------ ValueError when the shape isn't a tuple of length 1, 2 or 3 TypeError because the context argument isn't a valid pyopencl.Context Notes ----- The underlying clFFT call is 'clfftCreateDefaultPlan' """ self.lib = lib if not isinstance(context, cl.Context): raise TypeError('expected cl.Context as type of first argument') cdef cl_context context_handle = context.int_ptr ndim = len(shape) if ndim not in (1,2,3): raise ValueError('expected shape to be tuple of length 1,2 or 3') cdef size_t lengths[3] cdef int i for i in range(ndim): lengths[i] = shape[i] cdef clfftDim ndim_cl = CLFFT_1D if ndim==1: ndim_cl = CLFFT_1D elif ndim==2: ndim_cl = CLFFT_2D elif ndim==3: ndim_cl = CLFFT_3D clfftCreateDefaultPlan(&self.plan, context_handle, ndim_cl, &lengths[0]) finalize(self, _destroy_plan, self.plan) #print 'init plan', self.plan property precision: """the floating point precision of the FFT data""" def __get__(self): cdef clfftPrecision precision errcheck(clfftGetPlanPrecision(self.plan, &precision)) return clfftPrecision_(precision) def __set__(self, clfftPrecision value): errcheck(clfftSetPlanPrecision(self.plan, value)) property scale_forward: """the scaling factor to be applied to the FFT data for forward transforms""" def __get__(self): cdef cl_float scale errcheck(clfftGetPlanScale(self.plan, CLFFT_FORWARD, &scale)) return scale def __set__(self, cl_float value): errcheck(clfftSetPlanScale(self.plan, CLFFT_FORWARD, value)) property scale_backward: """the scaling factor to be applied to the FFT data for backward transforms""" def __get__(self): cdef cl_float scale errcheck(clfftGetPlanScale(self.plan, CLFFT_BACKWARD, &scale)) return scale def __set__(self, cl_float value): errcheck(clfftSetPlanScale(self.plan, CLFFT_BACKWARD, value)) property batch_size: """the number of discrete arrays that this plan can handle concurrently""" def __get__(self): cdef size_t nbatch errcheck(clfftGetPlanBatchSize(self.plan, &nbatch)) return nbatch def __set__(self, nbatch): errcheck(clfftSetPlanBatchSize(self.plan, nbatch)) cdef clfftDim get_dim(self): cdef clfftDim dim cdef cl_uint size errcheck(clfftGetPlanDim(self.plan, &dim, &size)) return dim property shape: """the length of each dimension of the FFT""" def __get__(self): cdef clfftDim dim = self.get_dim() cdef size_t sizes[3] errcheck(clfftGetPlanLength(self.plan, dim, &sizes[0])) if dim == 1: return (sizes[0],) elif dim == 2: return (sizes[0], sizes[1]) elif dim == 3: return (sizes[0], sizes[1], sizes[2]) def __set__(self, tuple shape): assert len(shape) <= 3 cdef clfftDim dim = len(shape) #errcheck(clfftSetPlanDim(self.plan, dim)) cdef size_t sizes[3] cdef int i for i in range(len(shape)): sizes[i] = shape[i] errcheck(clfftSetPlanLength(self.plan, dim, &sizes[0])) property strides_in: """the distance between consecutive elements for input buffers in a dimension""" def __get__(self): cdef clfftDim dim = self.get_dim() cdef size_t strides[3] errcheck(clfftGetPlanInStride(self.plan, dim, strides)) if dim == 1: return (strides[0],) elif dim == 2: return (strides[0], strides[1]) elif dim == 3: return (strides[0], strides[1], strides[2]) def __set__(self, tuple strides): assert len(strides) <= 3 cdef clfftDim dim = len(strides) cdef size_t c_strides[3] cdef int i for i in range(dim): c_strides[i] = strides[i] errcheck(clfftSetPlanInStride(self.plan, dim, &c_strides[0])) property strides_out: """the distance between consecutive elements for output buffers in a dimension""" def __get__(self): cdef clfftDim dim = self.get_dim() cdef size_t strides[3] errcheck(clfftGetPlanOutStride(self.plan, dim, strides)) if dim == 1: return (strides[0],) elif dim == 2: return (strides[0], strides[1]) elif dim == 3: return (strides[0], strides[1], strides[2]) def __set__(self, tuple strides): assert len(strides) <= 3 cdef clfftDim dim = len(strides) cdef size_t c_strides[3] cdef int i for i in range(dim): c_strides[i] = strides[i] errcheck(clfftSetPlanOutStride(self.plan, dim, &c_strides[0])) property distances: """the distance between array objects""" def __get__(self): cdef size_t dist_in, dist_out errcheck(clfftGetPlanDistance(self.plan, &dist_in, &dist_out)) return (dist_in, dist_out) def __set__(self, tuple distances): assert len(distances) == 2 errcheck(clfftSetPlanDistance(self.plan, distances[0], distances[1])) property layouts: """the expected layout of the output buffers""" def __get__(self): cdef clfftLayout layout_in, layout_out errcheck(clfftGetLayout(self.plan, &layout_in, &layout_out)) return (layout_in, layout_out) def __set__(self, tuple layouts): assert len(layouts) == 2 errcheck(clfftSetLayout(self.plan, layouts[0], layouts[1])) property inplace: """determines if the input buffers are going to be overwritten with results (True == inplace, False == out of place)""" def __get__(self): cdef clfftResultLocation placeness errcheck(clfftGetResultLocation(self.plan, &placeness)) return placeness == CLFFT_INPLACE def __set__(self, value): cdef clfftResultLocation placeness if value: placeness = CLFFT_INPLACE else: placeness = CLFFT_OUTOFPLACE errcheck(clfftSetResultLocation(self.plan, placeness)) property temp_array_size: """Buffer size (in bytes), which may be needed internally for an intermediate buffer. Requires that transform plan is baked before.""" def __get__(self): cdef size_t buffersize errcheck(clfftGetTmpBufSize(self.plan, &buffersize)) return buffersize property transpose_result: """the final transpose setting of a multi-dimensional FFT True: transpose the final result (default) False: skip final transpose """ def __get__(self): cdef clfftResultTransposed transposed errcheck(clfftGetPlanTransposeResult(self.plan, &transposed)) return transposed == CLFFT_TRANSPOSED def __set__(self, transpose): cdef clfftResultTransposed transposed if transpose: transposed = CLFFT_TRANSPOSED else: transposed = CLFFT_NOTRANSPOSE errcheck(clfftSetPlanTransposeResult(self.plan, transposed)) def bake(self, queues): """Prepare the plan for execution. Prepares and compiles OpenCL kernels internally used to perform the transform. At this point, the clfft runtime applies all implemented optimizations, possibly including running kernel experiments on the devices in the plan context. This can take a long time to execute. If not called, this is performed when the plan is execute for the first time. Parameters ---------- queues : `pyopencl.CommandQueue` or list of `pyopencl.CommandQueue` Returns ------- None Raises ------ `GpyFFT_Error` An error occurred accessing the clfftBakePlan function Notes ----- The underlying clFFT call is 'clfftBakePlan' """ if isinstance(queues, cl.CommandQueue): queues = (queues,) cdef int n_queues = len(queues) assert n_queues <= MAX_QUEUES cdef cl_command_queue queues_[MAX_QUEUES] cdef int i for i in range(n_queues): assert isinstance(queues[i], cl.CommandQueue) queues_[i] = queues[i].int_ptr errcheck(clfftBakePlan(self.plan, n_queues, queues_, NULL, NULL)) def set_callback(self, func_name, func_string, callback_type, local_mem_size=0, user_data=None): """Register callback. Parameters ---------- func_name: bytes callback function name func_string: bytes callback function, gets inlined in OpenCL kernel callback_type: 'pre' or 'post' local_mem_size: int size (bytes) of the local memory used by the callback user_data: pyopencl.Buffer or iterable of pyopencl.Buffer Notes ----- The underlying clFFT call 'clSetPlanCallback' """ typedict = {'pre': PRECALLBACK, 'post': POSTCALLBACK} clfft_callback_type = typedict[callback_type] if user_data is None: user_data = () if isinstance(user_data, cl.Buffer): user_data = (user_data,) n_user_data_buffers = len(user_data) cdef cl_mem* user_buffers = NULL if n_user_data_buffers: user_buffers = malloc(n_user_data_buffers*sizeof(cl_mem)) for n, user_data_buffer in enumerate(user_data): assert isinstance(user_data_buffer, cl.Buffer) user_buffers[n] = user_data_buffer.int_ptr try: res = clfftSetPlanCallback(self.plan, func_name, func_string, local_mem_size, clfft_callback_type, user_buffers, n_user_data_buffers) finally: free(user_buffers) errcheck(res) def enqueue_transform(self, queues, in_buffers, out_buffers = None, direction_forward = True, wait_for_events = None, temp_buffer = None, ): """Enqueue an FFT transform operation, and return immediately. Parameters ---------- queues : pyopencl.CommandQueue or iterable of pyopencl.CommandQueue in_buffers : pyopencl.Buffer or iterable (1 or 2 items) of pyopencl.Buffer out_buffers : pyopencl.Buffer or iterable (1 or 2 items) of pyopencl.Buffer, optional can be None for inplace transforms Other Parameters ---------------- direction_forward : bool, optional Perform forward transform (default True). wait_for_events : iterable of pyopencl.Event, optional Ensures that all events in this list have finished execution before transform is performed. temp_buffer : pyopencl.Buffer, optional For intermediate results a temporary buffer can be provided. The size (in bytes) of this buffer is given by the `temp_array_size` property. Returns ------- tuple of `pyopencl.Event`, one event for each command queue in `queues` Raises ------ `GpyFFT_Error` An error occurred accessing the clfftEnqueueTransform function Notes ----- The underlying clFFT call is 'clfftEnqueueTransform' """ cdef int i cdef clfftDirection direction if direction_forward: direction = CLFFT_FORWARD else: direction = CLFFT_BACKWARD cdef cl_command_queue queues_[MAX_QUEUES] if isinstance(queues, cl.CommandQueue): queues = (queues,) n_queues = len(queues) assert n_queues <= MAX_QUEUES for i, queue in enumerate(queues): assert isinstance(queue, cl.CommandQueue) queues_[i] = queue.int_ptr cdef cl_event wait_for_events_array[MAX_WAITFOR_EVENTS] cdef cl_event* wait_for_events_ = NULL cdef n_waitfor_events = 0 if wait_for_events is not None and len(wait_for_events) > 0: n_waitfor_events = len(wait_for_events) assert n_waitfor_events <= MAX_WAITFOR_EVENTS for i, event in enumerate(wait_for_events): assert isinstance(event, cl.Event) wait_for_events_array[i] = event.int_ptr wait_for_events_ = &wait_for_events_array[0] cdef cl_mem in_buffers_[2] if isinstance(in_buffers, cl.MemoryObjectHolder): in_buffers = (in_buffers,) n_in_buffers = len(in_buffers) assert n_in_buffers <= 2 for i, in_buffer in enumerate(in_buffers): assert isinstance(in_buffer, cl.MemoryObjectHolder) in_buffers_[i] = in_buffer.int_ptr cdef cl_mem out_buffers_array[2] cdef cl_mem* out_buffers_ = NULL if out_buffers is not None: if isinstance(out_buffers, cl.MemoryObjectHolder): out_buffers = (out_buffers,) n_out_buffers = len(out_buffers) assert n_out_buffers in (1,2) for i, out_buffer in enumerate(out_buffers): assert isinstance(out_buffer, cl.MemoryObjectHolder) out_buffers_array[i] = out_buffer.int_ptr out_buffers_ = &out_buffers_array[0] cdef cl_mem tmp_buffer_ = NULL if temp_buffer is not None: assert isinstance(temp_buffer, cl.MemoryObjectHolder) tmp_buffer_ = temp_buffer.int_ptr cdef cl_event out_cl_events[MAX_QUEUES] errcheck(clfftEnqueueTransform(self.plan, direction, n_queues, &queues_[0], n_waitfor_events, &wait_for_events_[0], out_cl_events, &in_buffers_[0], out_buffers_, tmp_buffer_)) return tuple((cl.Event.from_int_ptr(out_cl_events[i], retain=False) for i in range(n_queues))) #gpyfft = GpyFFT() #cdef Plan PlanFactory(): #cdef Plan instance = Plan.__new__(Ref) #instance.plan = None #return instance gpyfft-0.7.1/gpyfft/test/000077500000000000000000000000001317765236500153135ustar00rootroot00000000000000gpyfft-0.7.1/gpyfft/test/__init__.py000066400000000000000000000000001317765236500174120ustar00rootroot00000000000000gpyfft-0.7.1/gpyfft/test/test_batched.py000066400000000000000000000041471317765236500203240ustar00rootroot00000000000000from __future__ import print_function import unittest from nose_parameterized import parameterized import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft import FFT from gpyfft.test.util import get_contexts contexts = [(ctx,) for ctx in get_contexts()] class test_fft_batched(unittest.TestCase): @parameterized.expand(contexts) def test_2d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L = 4 M = 64 N = 32 axes = (-1, -2) nd_data = np.arange(L*M*N, dtype=np.complex64) nd_data.shape = (L, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3) @parameterized.expand(contexts) def test_2d_in_4d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) L1 = 4 L2 = 5 M = 64 N = 32 axes = (-1, -2) #ok #axes = (0,1) #ok #axes = (0,2) #cannot be collapsed nd_data = np.arange(L1*L2*M*N, dtype=np.complex64) nd_data.shape = (L1, L2, M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = axes, ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.fft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fft2(nd_data, axes=axes), rtol=1e-3, atol=1e-3) gpyfft-0.7.1/gpyfft/test/test_callback.py000066400000000000000000000122471317765236500204660ustar00rootroot00000000000000# coding: utf-8 from __future__ import absolute_import, division, print_function import unittest import os import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft.gpyfftlib import * from gpyfft.test.util import get_contexts class TestCallbackPreMul(unittest.TestCase): callback_kernel_src_premul = b""" float2 premul(__global void* in, uint inoffset, __global void* userdata //__local void* localmem ) { float scalar = *((__global float*)userdata + inoffset); float2 ret = *((__global float2*)in + inoffset) * scalar; return ret; } """ def test_callback_pre(self): for ctx in get_contexts(): self.callback_pre(ctx) def callback_pre(self, context): print("context:", context) queue = cl.CommandQueue(context) nd_data = np.array([[1, 2, 3, 4], [5, 6, 5, 2]], dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.empty_like(cl_data) print("cl_data:") print(cl_data) print('nd_data.shape/strides:', nd_data.shape, nd_data.strides) print('cl_data.shape/strides:', cl_data.shape, cl_data.strides) print('cl_data_transformed.shape/strides:', cl_data_transformed.shape, cl_data_transformed.strides) G = GpyFFT(debug=False) plan = G.create_plan(context, cl_data.shape) plan.strides_in = tuple(x // cl_data.dtype.itemsize for x in cl_data.strides) plan.strides_out = tuple(x // cl_data.dtype.itemsize for x in cl_data_transformed.strides) print('plan.strides_in', plan.strides_in) print('plan.strides_out', plan.strides_out) print('plan.distances', plan.distances) print('plan.batch_size', plan.batch_size) plan.inplace = False plan.precision = CLFFT_SINGLE print('plan.precision:', plan.precision) plan.scale_forward = 1. print('plan.scale_forward:', plan.scale_forward) #print('plan.transpose_result:', plan.transpose_result) nd_user_data = np.array([[2, 2, 2, 2], [3, 4, 5, 6]], dtype=np.float32) cl_user_data = cla.to_device(queue, nd_user_data) print('cl_user_data') print(cl_user_data) plan.set_callback(b'premul', self.callback_kernel_src_premul, 'pre', user_data=cl_user_data.data) plan.bake(queue) print('plan.temp_array_size:', plan.temp_array_size) plan.enqueue_transform((queue,), (cl_data.data,), (cl_data_transformed.data,) ) queue.finish() print('cl_data_transformed:') print(cl_data_transformed) print('fft(nd_data * nd_user_data):') print(np.fft.fftn(nd_data * nd_user_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fftn(nd_data * nd_user_data)) del plan callback_kernel_src_postset = b""" float2 postset(__global void* output, uint offset, __global void* userdata, float2 fftoutput) { float scalar = *((__global float*)userdata + offset); *((__global float2*)output + offset) = fftoutput * scalar; } """ def test_callback_post(self): for ctx in get_contexts(): self.callback_post(ctx) def callback_post(self, context): print("context:", context) queue = cl.CommandQueue(context) nd_data = np.array([[1, 2, 3, 4], [5, 6, 5, 2]], dtype=np.complex64) nd_user_data = np.array([[2, 2, 2, 2], [3, 4, 5, 6]], dtype=np.float32) cl_data = cla.to_device(queue, nd_data) cl_user_data = cla.to_device(queue, nd_user_data) cl_data_transformed = cla.empty_like(cl_data) G = GpyFFT(debug=False) plan = G.create_plan(context, cl_data.shape) plan.strides_in = tuple(x // cl_data.dtype.itemsize for x in cl_data.strides) plan.strides_out = tuple(x // cl_data.dtype.itemsize for x in cl_data_transformed.strides) plan.inplace = False plan.precision = CLFFT_SINGLE plan.set_callback(b'postset', self.callback_kernel_src_postset, 'post', user_data=cl_user_data.data) plan.bake(queue) plan.enqueue_transform((queue,), (cl_data.data,), (cl_data_transformed.data,) ) queue.finish() print('cl_data_transformed:') print(cl_data_transformed) print('fft(nd_data) * nd_user_data') print(np.fft.fftn(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.fftn(nd_data) * nd_user_data) del plan #TODO: create TestSuite gpyfft-0.7.1/gpyfft/test/test_gpyfft.py000066400000000000000000000107371317765236500202330ustar00rootroot00000000000000from __future__ import print_function import unittest from nose_parameterized import parameterized import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft import FFT from gpyfft.test.util import get_contexts, has_double """ Some basic tests for high-level interface """ #TODO: perform tests for all contexts contexts = [(ctx,) for ctx in get_contexts()] #contexts = [] # see what happens if no OpenCL GPU device available class test_fft(unittest.TestCase): def test_context(self): self.assertTrue(contexts, msg='no OpenCL GPU device available') @parameterized.expand(contexts) def test_1d_inplace(self, ctx): queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) #cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data) transform.enqueue() #print(cl_data) #print(np.fft.fft(nd_data)) assert np.allclose(cl_data.get(), np.fft.fft(nd_data)) @parameterized.expand(contexts) def test_1d_out_of_place(self, ctx): queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) transform = FFT(ctx, queue, cl_data, cl_data_transformed ) transform.enqueue() assert np.allclose(cl_data_transformed.get(), np.fft.fft(nd_data)) @parameterized.expand(contexts) def test_1d_inplace_double(self, ctx): if not has_double(ctx): #TODO: find better way to skip test return queue = cl.CommandQueue(ctx) nd_data = np.arange(32, dtype=np.complex128) cl_data = cla.to_device(queue, nd_data) transform = FFT(ctx, queue, cl_data) transform.enqueue() assert np.allclose(cl_data.get(), np.fft.fft(nd_data)) @parameterized.expand(contexts) def test_1d_real_to_complex(self, ctx): queue = cl.CommandQueue(ctx) N = 32 nd_data = np.arange(N, dtype=np.float32) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (N//2+1,), dtype = np.complex64) transform = FFT(ctx, queue, cl_data, cl_data_transformed, ) transform.enqueue() assert np.allclose(cl_data_transformed.get(), np.fft.rfft(nd_data)) @parameterized.expand(contexts) def test_2d_real_to_complex(self, ctx): queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M*N, dtype=np.float32) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex64) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = (1,0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-3, atol=1e-3) @parameterized.expand(contexts) def test_2d_real_to_complex_double(self, ctx): if not has_double(ctx): #TODO: find better way to skip test return queue = cl.CommandQueue(ctx) M = 64 N = 32 nd_data = np.arange(M*N, dtype=np.float64) nd_data.shape = (M, N) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex128) transform = FFT(ctx, queue, cl_data, cl_data_transformed, axes = (1,0), ) transform.enqueue() print(cl_data_transformed.get) print(np.fft.rfft2(nd_data)) assert np.allclose(cl_data_transformed.get(), np.fft.rfft2(nd_data), rtol=1e-8, atol=1e-8) if __name__ == '__main__': unittest.main() gpyfft-0.7.1/gpyfft/test/test_gpyfftlib.py000066400000000000000000000021501317765236500207100ustar00rootroot00000000000000from __future__ import print_function import unittest import numpy as np import pyopencl as cl import pyopencl.array as cla from gpyfft import gpyfftlib from gpyfft.test.util import get_contexts """ Some basic tests """ class test_basic(unittest.TestCase): def test_basic(self): G = gpyfftlib.GpyFFT() print('clFFT version:', G.get_version()) del G #@unittest.skip('segfaults with pytest') def test_create_plan(self): G = gpyfftlib.GpyFFT() ctx = get_contexts()[0] queue = cl.CommandQueue(ctx) nd_data = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.complex64) cl_data = cla.to_device(queue, nd_data) cl_data_transformed = cla.zeros_like(cl_data) plan = G.create_plan(ctx, cl_data.shape) print('plan.strides_in', plan.strides_in) print('plan.strides_out', plan.strides_out) print('plan.distances', plan.distances) print('plan.batch_size', plan.batch_size) del plan del G if __name__ == '__main__': unittest.main() gpyfft-0.7.1/gpyfft/test/util.py000066400000000000000000000007061317765236500166450ustar00rootroot00000000000000import pyopencl as cl def get_contexts(): """ Return list of OpenCL contexts for all (GPU) devices present in the system. """ ALL_DEVICES = [] for platform in cl.get_platforms(): ALL_DEVICES += platform.get_devices(device_type = cl.device_type.GPU) contexts = [ cl.Context([device]) for device in ALL_DEVICES ] return contexts def has_double(ctx): dev = ctx.devices[0] return 'cl_khr_fp64' in dev.extensions gpyfft-0.7.1/gpyfft/version.py000066400000000000000000000000261317765236500163710ustar00rootroot00000000000000__version__ = '0.7.1' gpyfft-0.7.1/setup.cfg000066400000000000000000000001771317765236500146630ustar00rootroot00000000000000[bumpversion] current_version = 0.7.1 commit = True tag = True [build_ext] inplace = 1 [bumpversion:file:gpyfft/version.py] gpyfft-0.7.1/setup.py000066400000000000000000000054621317765236500145560ustar00rootroot00000000000000import os import platform from setuptools import setup, Extension from distutils.util import convert_path from Cython.Build import cythonize system = platform.system() ## paths settings # Linux if 'Linux' in system: CLFFT_DIR = r'/home/gregor/devel/clFFT' CLFFT_LIB_DIR = r'/usr/local/lib64' CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'src', 'include'), ] CL_INCL_DIRS = ['/opt/AMDAPPSDK-3.0/include'] #Windows elif 'Windows' in system: CLFFT_DIR = r'C:\Users\q014gt\Devel\clFFT-Full-2.10.2-Windows-x64' CLFFT_LIB_DIR = os.path.join(CLFFT_DIR, 'bin') CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'include'), ] CL_DIR = os.getenv('AMDAPPSDKROOT') CL_INCL_DIRS = [os.path.join(CL_DIR, 'include')] # macOS elif 'Darwin' in system: CLFFT_DIR = r'/Users/gregor/Devel/clFFT' CLFFT_LIB_DIR = r'/Users/gregor/Devel/clFFT/src/library' CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'src', 'include'), ] CL_INCL_DIRS = [] import Cython.Compiler.Options Cython.Compiler.Options.generate_cleanup_code = 2 extensions = [ Extension("gpyfft.gpyfftlib", [os.path.join('gpyfft', 'gpyfftlib.pyx')], include_dirs= CLFFT_INCL_DIRS + CL_INCL_DIRS, extra_compile_args=[], extra_link_args=[], libraries=['clFFT'], library_dirs = [CLFFT_LIB_DIR,], language='c++', ) ] def copy_clfftdll_to_package(): import shutil shutil.copy( os.path.join(CLFFT_LIB_DIR, 'clFFT.dll'), 'gpyfft') shutil.copy( os.path.join(CLFFT_LIB_DIR, 'StatTimer.dll'), 'gpyfft') print("copied clFFT.dll, StatTimer.dll") package_data = {} if 'Windows' in platform.system(): copy_clfftdll_to_package() package_data.update({'gpyfft': ['clFFT.dll', 'StatTimer.dll']},) def get_version(): main_ns = {} version_path = convert_path('gpyfft/version.py') with open(version_path) as version_file: exec(version_file.read(), main_ns) version = main_ns['__version__'] return version def get_readme(): dirname = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(dirname, "README.md"), "r") as fp: long_description = fp.read() return long_description install_requires = ["numpy", "pyopencl"] setup_requires = ["numpy", "cython"] setup( name='gpyfft', version=get_version(), description='A Python wrapper for the OpenCL FFT library clFFT', long_description=get_readme(), url=r"https://github.com/geggo/gpyfft", maintainer='Gregor Thalhammer', maintainer_email='gregor.thalhammer@gmail.com', license='LGPL', packages=['gpyfft', "gpyfft.test"], ext_modules=cythonize(extensions), package_data=package_data, install_requires=install_requires, setup_requires=setup_requires, )