pax_global_header 0000666 0000000 0000000 00000000064 13177652365 0014531 g ustar 00root root 0000000 0000000 52 comment=f34b1ca101ffaf9e04eff996df71215cdebdc9f7
gpyfft-0.7.1/ 0000775 0000000 0000000 00000000000 13177652365 0013035 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/.gitattributes 0000664 0000000 0000000 00000000014 13177652365 0015723 0 ustar 00root root 0000000 0000000 * text=auto
gpyfft-0.7.1/.gitignore 0000664 0000000 0000000 00000000116 13177652365 0015023 0 ustar 00root root 0000000 0000000 build
*.pyc
gpyfft.egg-info
dist
gpyfftlib.cpp
*.so
.cache
.ipynb_checkpoints
gpyfft-0.7.1/LICENSE.txt 0000664 0000000 0000000 00000016743 13177652365 0014673 0 ustar 00root root 0000000 0000000 GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.
gpyfft-0.7.1/MANIFEST.in 0000664 0000000 0000000 00000000164 13177652365 0014574 0 ustar 00root root 0000000 0000000 include README.md
include MANIFEST.in
include LICENSE.txt
include gpyfft/gpyfftlib.pxd
include gpyfft/gpyfftlib.pyx
gpyfft-0.7.1/README.md 0000664 0000000 0000000 00000007336 13177652365 0014325 0 ustar 00root root 0000000 0000000 gpyfft
======
A Python wrapper for the OpenCL FFT library clFFT.
## Introduction
### clFFT
The open source library [clFFT] implements FFT for running on a GPU via OpenCL. Some highlights are:
* batched 1D, 2D, and 3D transforms
* supports many transform sizes (any combinatation of powers of 2,3,5,7,11, and 13)
* flexible memory layout
* single and double precisions
* complex and real-to-complex transforms
* supports injecting custom code for data pre- and post-processing
### gpyfft
This python wrapper is designed to tightly integrate with [PyOpenCL]. It consists of a low-level Cython based wrapper with an interface similar to the underlying C library. On top of that it offers a high-level interface designed to work on data contained in instances of `pyopencl.array.Array`, a numpy work-alike array class for GPU computations. The high-level interface takes some inspiration from [pyFFTW]. For details of the high-level interface see [fft.py].
## Status
The low lever interface is complete (more or less), the high-level interface is not yet settled and likely to change in future. Features to come (not yet implemented in the high-level interface):
### work done
- low level wrapper (mostly) completed
- high level wrapper
* complex-to-complex transform, in- and out-of-place
* real-to-complex transform (out-of-place)
* complex-to-real transform (out-of-place)
* single precision
* double precision
* interleaved data
* support injecting custom OpenCL code (pre and post callbacks)
* accept pyopencl arrays with non-zero offsets (Syam Gadde)
## Basic usage
Here we describe a simple example of performing a batch of 2D complex-to-complex FFT transforms on the GPU, using the high-level interface of gpyfft. The full source code of this example ist contained in [simple\_example.py], which is the essence of [benchmark.py].
Note, for testing it is recommended to start [simple\_example.py] from the command line, so you have the possibility to interactively choose an OpenCL context (otherwise, e.g. when using an IPython, you are not asked end might end up with a CPU device, which is prone to fail).
imports:
``` python
import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft.fft import FFT
```
initialize GPU:
``` python
context = cl.create_some_context()
queue = cl.CommandQueue(context)
```
initialize memory (on host and GPU). In this example we want to perform in parallel four 2D FFTs for 1024x1024 single precision data.
``` python
data_host = np.zeros((4, 1024, 1024), dtype = np.complex64)
#data_host[:] = some_useful_data
data_gpu = cla.to_device(queue, data_host)
```
create FFT transform plan for batched inline 2D transform along second two axes.
``` python
transform = FFT(context, queue, data_gpu, axes = (2, 1))
```
If you want an out-of-place transform, provide the output array as additional argument after the input data.
Start the work and wait until it is finished (Note that enqueu() returns a tuple of events)
``` python
event, = transform.enqueue()
event.wait()
```
Read back the data from the GPU to the host
``` python
result_host = data_gpu.get()
```
## Benchmark
A simple benchmark is contained as a submodule, you can run it on the command line by `python -m gpyfft.benchmark`, or from Python
``` python
import gpyfft.benchmark
gpyfft.benchmark.run()
```
Note, you might want to set the `PYOPENCL_CTX` environment variable to select your OpenCL platform and device.
[clFFT]: https://github.com/clMathLibraries/clFFT
[pyFFTW]: https://github.com/hgomersall/pyFFTW
[PyOpenCL]: https://mathema.tician.de/software/pyopencl
[fft.py]: gpyfft/fft.py
[pyfft]: http://github.com/Manticore/pyfft
[simple\_example.py]: examples/simple_example.py
[benchmark.py]: gpyfft/benchmark.py
gpyfft-0.7.1/docs/ 0000775 0000000 0000000 00000000000 13177652365 0013765 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/docs/Makefile 0000664 0000000 0000000 00000013176 13177652365 0015435 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
#BUILDDIR = ../../gpyfft-docs
BUILDDIR = build
#PDFBUILDDIR = /tmp
PDFBUILDDIR = build
#PDF = ../manual.pdf
PDF = manual.pdf
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/gpyfft.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/gpyfft.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/gpyfft"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/gpyfft"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(PDFBUILDDIR)/latex all-pdf
cp $(PDFBUILDDIR)/latex/*.pdf $(PDF)
@echo "pdflatex finished; the PDF files are in $(PDFBUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
gpyfft-0.7.1/docs/make.bat 0000664 0000000 0000000 00000012667 13177652365 0015406 0 ustar 00root root 0000000 0000000 @echo off
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=..\..\gpyfft-docs
set PDFBUILDDIR=\tmp
set PDF= ..\manual.pdf
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
set I18NSPHINXOPTS=%SPHINXOPTS% source
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^` where ^ is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. latexpdf to make pdf files
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
echo Don't do that
rem for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
rem del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\gpyfft.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\gpyfft.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %PDFBUILDDIR%/latex
chdir /D %PDFBUILDDIR%\latex
pdflatex %PDFBUILDDIR%\latex\gpyfft.tex
chdir /D %~dp0
copy %PDFBUILDDIR%\latex\gpyfft.pdf %PDF%
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %PDFBUILDDIR%\latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end
gpyfft-0.7.1/docs/source/ 0000775 0000000 0000000 00000000000 13177652365 0015265 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/docs/source/building.rst 0000664 0000000 0000000 00000000147 13177652365 0017616 0 ustar 00root root 0000000 0000000 Building gpyfft
***************
Here will be detailed instructions for building gpyfft from source.
gpyfft-0.7.1/docs/source/code.rst 0000664 0000000 0000000 00000000215 13177652365 0016727 0 ustar 00root root 0000000 0000000 gpyfft class structure
======================
.. automodule:: gpyfft
.. toctree::
:maxdepth: 2
gpyfft
plan
gpyfft_error
gpyfft-0.7.1/docs/source/conf.py 0000664 0000000 0000000 00000017270 13177652365 0016573 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# gpyfft documentation build configuration file, created by
# sphinx-quickstart on Wed Jun 06 11:48:24 2012.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'numpydoc', 'sphinx.ext.autosummary']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_parsers = {
'.md': 'recommonmark.parser.CommonMarkParser',
}
source_suffix = ['.rst', '.md']
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'gpyfft'
copyright = u'2012, Gregor Thalhammer'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.1'
# The full version, including alpha/beta/rc tags.
release = '0.1'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'gpyfftdoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'gpyfft.tex', u'gpyfft Documentation',
u'Gregor Thalhammer', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'gpyfft', u'gpyfft Documentation',
[u'Gregor Thalhammer'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'gpyfft', u'gpyfft Documentation',
u'Gregor Thalhammer', 'gpyfft', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
gpyfft-0.7.1/docs/source/gpyfft.rst 0000664 0000000 0000000 00000000126 13177652365 0017315 0 ustar 00root root 0000000 0000000 GpyFFT
******
.. autoclass:: gpyfft.GpyFFT
:members: get_version, create_plan
gpyfft-0.7.1/docs/source/gpyfft_error.rst 0000664 0000000 0000000 00000000111 13177652365 0020520 0 ustar 00root root 0000000 0000000 GpyFFT_Error
************
.. autoclass:: gpyfft.GpyFFT_Error
:members:
gpyfft-0.7.1/docs/source/includeme.rst 0000664 0000000 0000000 00000000035 13177652365 0017762 0 ustar 00root root 0000000 0000000 .. include:: ../../README.md
gpyfft-0.7.1/docs/source/index.rst 0000664 0000000 0000000 00000000625 13177652365 0017131 0 ustar 00root root 0000000 0000000 .. gpyfft documentation master file, created by
sphinx-quickstart on Wed Jun 06 11:48:24 2012.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
gpyfft
======
.. toctree::
:numbered:
:maxdepth: 2
includeme
building
code
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
gpyfft-0.7.1/docs/source/plan.rst 0000664 0000000 0000000 00000000370 13177652365 0016751 0 ustar 00root root 0000000 0000000 Plan
====
.. autoclass:: gpyfft.Plan
:members: __init__, precision, scale_forward, scale_backward, batch_size, get_dim, shape, strides_in, strides_out, distances, layouts, inplace, temp_array_size, transpose_result, bake, enqueue_transform
gpyfft-0.7.1/examples/ 0000775 0000000 0000000 00000000000 13177652365 0014653 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/examples/simple_example.py 0000664 0000000 0000000 00000000662 13177652365 0020235 0 ustar 00root root 0000000 0000000 import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft.fft import FFT
context = cl.create_some_context()
queue = cl.CommandQueue(context)
data_host = np.zeros((4, 1024, 1024), dtype = np.complex64)
#data_host[:] = some_useful_data
data_gpu = cla.to_device(queue, data_host)
transform = FFT(context, queue, data_gpu, axes = (2, 1))
event, = transform.enqueue()
event.wait()
result_host = data_gpu.get()
gpyfft-0.7.1/gpyfft/ 0000775 0000000 0000000 00000000000 13177652365 0014334 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/gpyfft/__init__.py 0000664 0000000 0000000 00000000263 13177652365 0016446 0 ustar 00root root 0000000 0000000 from __future__ import absolute_import
import logging
logging.basicConfig()
from .version import __version__
from .gpyfftlib import GpyFFT, GpyFFT_Error, Plan
from .fft import *
gpyfft-0.7.1/gpyfft/benchmark.py 0000664 0000000 0000000 00000011535 13177652365 0016645 0 ustar 00root root 0000000 0000000 from __future__ import absolute_import, division, print_function
import timeit
import numpy as np
from numpy.fft import fftn as npfftn
from numpy.testing import assert_array_almost_equal, assert_allclose
import pyopencl as cl
import pyopencl.array as cla
from gpyfft import FFT
from gpyfft.gpyfftlib import GpyFFT_Error
#real to complex: (forward) out_array.shape[axes][-1] = in_array.shape[axes][-1]//2 + 1
def run(double_precision=False):
context = cl.create_some_context()
queue = cl.CommandQueue(context)
dtype = np.complex64 if not double_precision else np.complex128
n_run = 100 #set to 1 for testing for correct result
if n_run > 1:
nd_dataC = np.random.normal(size=(1024, 1024)).astype(dtype)
else:
nd_dataC = np.ones((1024, 1024), dtype = dtype) #set n_run to 1
nd_dataF = np.asfortranarray(nd_dataC)
dataC = cla.to_device(queue, nd_dataC)
dataF = cla.to_device(queue, nd_dataF)
nd_result = np.zeros_like(nd_dataC, dtype = dtype)
resultC = cla.to_device(queue, nd_result)
resultF = cla.to_device(queue, np.asfortranarray(nd_result))
result = resultF
axes_list = [(-2,-1), (-1,-2), None] #batched 2d transforms
if True:
print('out of place transforms', dataC.shape, dataC.dtype)
print('axes in out')
for axes in axes_list:
for data in (dataC,
dataF):
for result in (resultC,
resultF):
t_ms, gflops = 0, 0
try:
transform = FFT(context, queue, data, result, axes = axes)
#transform.plan.transpose_result = True #not implemented for some transforms (works e.g. for out of place, (2,1) C C)
print('%-10s %3s %3s'
% (
axes,
'C' if data.flags.c_contiguous else 'F',
'C' if result.flags.c_contiguous else 'F',
),
end=' ',
)
tic = timeit.default_timer()
for i in range(n_run):
events = transform.enqueue()
#events = transform.enqueue(False)
for e in events:
e.wait()
toc = timeit.default_timer()
t_ms = 1e3*(toc-tic)/n_run
gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms)
npfft_result = npfftn(nd_dataC, axes = axes)
if transform.plan.transpose_result:
npfft_result = np.swapaxes(npfft_result, axes[0], axes[1])
max_error = np.max(abs(result.get() - npfft_result))
print('%8.1e'%max_error, end=' ')
assert_allclose(result.get(), npfft_result,
atol = 1e-8 if double_precision else 1e-3,
rtol = 1e-8 if double_precision else 1e-3)
#assert_array_almost_equal(abs(result.get() - npfftn(data.get(), axes = axes)),
# 1e-4)
except GpyFFT_Error as e:
print(e)
except AssertionError as e:
print(e)
except Exception as e:
print(e)
finally:
print('%5.2fms %6.2f Gflops' % (t_ms, gflops) )
print('in place transforms', nd_dataC.shape, nd_dataC.dtype)
for axes in axes_list:
for nd_data in (nd_dataC, nd_dataF):
data = cla.to_device(queue, nd_data)
transform = FFT(context, queue, data, axes = axes)
#transform.plan.transpose_result = True #not implemented
tic = timeit.default_timer()
for i in range(n_run): # inplace transform fails for n_run > 1
events = transform.enqueue()
for e in events:
e.wait()
toc = timeit.default_timer()
t_ms = 1e3*(toc-tic)/n_run
gflops = 5e-9 * np.log2(np.prod(transform.t_shape))*np.prod(transform.t_shape) * transform.batchsize / (1e-3*t_ms)
print('%-10s %3s %5.2fms %6.2f Gflops' % (
axes,
'C' if data.flags.c_contiguous else 'F',
t_ms, gflops
))
#assert_array_almost_equal(data.get(queue=queue), npfftn(nd_data, axes = axes)) #never fails ????
if __name__ == '__main__':
run()
run(double_precision=True)
gpyfft-0.7.1/gpyfft/fft.py 0000664 0000000 0000000 00000023541 13177652365 0015472 0 ustar 00root root 0000000 0000000 from __future__ import absolute_import, division, print_function
from .gpyfftlib import GpyFFT
import gpyfft.gpyfftlib as gfft
import pyopencl as cl
GFFT = GpyFFT(debug=False)
import pyopencl as cl
import numpy as np
# TODO:
class FFT(object):
def __init__(self, context, queue, in_array, out_array=None, axes = None,
fast_math = False,
real=False,
callbacks=None, #dict: 'pre', 'post'
):
# Callbacks: dict(pre=b'pre source (kernel named pre!)')
self.context = context
self.queue = queue
# if no axes are given, transform all axes, select axes order for good performance depending on memory layout
if axes is None:
if in_array.flags.c_contiguous:
axes = np.arange(in_array.ndim)[::-1]
elif in_array.flags.f_contiguous:
axes = np.arange(in_array.ndim)
else:
axes = np.arange(in_array.dim)[::-1]
# TODO: find good heuristics for this (rare), e.g. based on strides
else:
axes = np.asarray(axes)
t_strides_in, t_distance_in, t_batchsize_in, t_shape, axes_transform = self.calculate_transform_strides(axes, in_array)
if out_array is not None:
t_inplace = False
t_strides_out, t_distance_out, t_batchsize_out, t_shape_out, axes_transform_out = self.calculate_transform_strides(
axes, out_array)
if in_array.base_data is out_array.base_data:
t_inplace = True
#assert t_batchsize_out == t_batchsize_in and t_shape == t_shape_out, 'input and output size does not match' #TODO: fails for real-to-complex
assert np.all(axes_transform == axes_transform_out), 'error finding transform axis (consider setting axes argument)'
else:
t_inplace = True
t_strides_out, t_distance_out = t_strides_in, t_distance_in
#assert np.issubclass(in_array.dtype, np.complexfloating) and \
# np.issubclass(in_array.dtype, np.complexfloating), \
#precision (+ fast_math!)
#complex64 <-> complex64
#complex128 <-> complex128
if in_array.dtype in (np.float32, np.complex64):
precision = gfft.CLFFT_SINGLE
elif in_array.dtype in (np.float64, np.complex128):
precision = gfft.CLFFT_DOUBLE
#TODO: add assertions that precision match
if in_array.dtype in (np.float32, np.float64):
layout_in = gfft.CLFFT_REAL
layout_out = gfft.CLFFT_HERMITIAN_INTERLEAVED
expected_out_shape = list(in_array.shape)
expected_out_shape[axes_transform[0]] = expected_out_shape[axes_transform[0]]//2 + 1
assert out_array.shape == tuple(expected_out_shape), \
'output array shape %s does not match expected shape: %s'%(out_array.shape,expected_out_shape)
elif in_array.dtype in (np.complex64, np.complex128):
if not real:
layout_in = gfft.CLFFT_COMPLEX_INTERLEAVED
layout_out = gfft.CLFFT_COMPLEX_INTERLEAVED
else:
# complex-to-real transform
layout_in = gfft.CLFFT_HERMITIAN_INTERLEAVED
layout_out = gfft.CLFFT_REAL
t_shape = t_shape_out
if t_inplace and ((layout_in is gfft.CLFFT_REAL) or
(layout_out is gfft.CLFFT_REAL)):
assert ((in_array.strides[axes_transform[0]] == in_array.dtype.itemsize) and \
(out_array.strides[axes_transform[0]] == out_array.dtype.itemsize)), \
'inline real transforms need stride 1 for first transform axis'
self.t_shape = t_shape
self.batchsize = t_batchsize_in
plan = GFFT.create_plan(context, t_shape)
plan.inplace = t_inplace
plan.strides_in = t_strides_in
plan.strides_out = t_strides_out
plan.distances = (t_distance_in, t_distance_out)
plan.batch_size = self.batchsize
plan.precision = precision
plan.layouts = (layout_in, layout_out)
if callbacks is not None:
if callbacks.has_key('pre'):
plan.set_callback(b'pre',
callbacks['pre'],
'pre')
if 'post' in callbacks:
plan.set_callback(b'post',
callbacks['post'],
'post')
if False:
print('axes', axes )
print('in_array.shape: ', in_array.shape)
print('in_array.strides/itemsize', tuple(s // in_array.dtype.itemsize for s in in_array.strides))
print('shape transform ', t_shape)
print('layout_in ', str(layout_in).split('.')[1])
print('t_strides ', t_strides_in)
print('distance_in ', t_distance_in)
print('batchsize ', t_batchsize_in)
print('layout_out ', str(layout_out).split('.')[1])
print('t_stride_out ', t_strides_out)
print('inplace ', t_inplace)
plan.bake(self.queue)
temp_size = plan.temp_array_size
if temp_size:
#print 'temp_size:', plan.temp_array_size
self.temp_buffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, size = temp_size)
else:
self.temp_buffer = None
self.plan = plan
self.data = in_array
self.result = out_array
@classmethod
def calculate_transform_strides(cls, axes_transform, array):
shape = np.array(array.shape)
strides = np.array(array.strides)
dtype = array.dtype
ddim = len(shape) #dimensionality data
tdim = len(axes_transform) #dimensionality transform
assert tdim <= ddim
# transform negative axis values (e.g. -1 for last axis) to positive
axes_transform[axes_transform<0] += ddim
# remaining, non-transformed axes
axes_notransform = np.lib.arraysetops.setdiff1d(range(ddim), axes_transform)
#sort non-transformed axes by strides
axes_notransform = axes_notransform[np.argsort(strides[axes_notransform])]
#print "axes_notransformed sorted", axes_notransform
# -> list of collapsable axes, [ [x,y], [z] ]
collapsable_axes_list = [] #result
collapsable_axes_candidates = axes_notransform[:1].tolist() #intermediate list of collapsable axes (magic code to get empty list if axes_notransform is empty)
for a in axes_notransform[1:]:
if strides[a] == strides[collapsable_axes_candidates[-1]] * shape[collapsable_axes_candidates[-1]]:
collapsable_axes_candidates.append(a) #add axes to intermediate list of collapsable axes
else: #does not fit into current intermediate list of collapsable axes
collapsable_axes_list.append(collapsable_axes_candidates) #store away intermediate list
collapsable_axes_candidates = [a] #start new intermediate list
collapsable_axes_list.append(collapsable_axes_candidates) #append last intermediate list to
assert len(collapsable_axes_list) == 1, 'data layout not supported (only single non-transformed axis allowd)' #all non-transformed axes collapsed
axes_notransform = collapsable_axes_list[0] #all axes collapsable: take single group of collapsable axes
t_distances = strides[axes_notransform]//dtype.itemsize
if len(t_distances) == 0:
t_distance = 0
else:
t_distance = t_distances[0] #takes smalles stride (axes_notransform have been sorted by stride size)
batchsize = np.prod(shape[axes_notransform])
t_shape = shape[axes_transform]
t_strides = strides[axes_transform]//dtype.itemsize
return (tuple(t_strides), t_distance, batchsize, tuple(t_shape), tuple(axes_transform)) #, tuple(axes_notransform))
def enqueue(self, forward = True, wait_for_events = None):
return self.enqueue_arrays(forward=forward, data=self.data, result=self.result, wait_for_events=wait_for_events)
def enqueue_arrays(self, data = None, result = None, forward = True, wait_for_events = None):
"""enqueue transform"""
if data is None:
data = self.data
else:
assert data.shape == self.data.shape
assert data.strides == self.data.strides
assert data.dtype == self.data.dtype
if result is None:
result = self.result
else:
assert result.shape == self.result.shape
assert result.strides == self.result.strides
assert result.dtype == self.result.dtype
# get buffer for data
if data.offset != 0:
data = data._new_with_changes(data=data.base_data[data.offset:], offset=0)
data_buffer = data.base_data
if result is not None:
# get buffer for result
if result.offset != 0:
result = result._new_with_changes(data=result.base_data[result.offset:], offset=0)
result_buffer = result.base_data
events = self.plan.enqueue_transform((self.queue,), (data_buffer,), (result_buffer),
direction_forward = forward, temp_buffer = self.temp_buffer, wait_for_events = wait_for_events)
else:
events = self.plan.enqueue_transform((self.queue,), (data_buffer,),
direction_forward = forward, temp_buffer = self.temp_buffer, wait_for_events = wait_for_events)
return events
def update_arrays(self, input_array, output_array):
pass
gpyfft-0.7.1/gpyfft/gpyfftlib.pxd 0000664 0000000 0000000 00000020345 13177652365 0017043 0 ustar 00root root 0000000 0000000 cdef extern from "clFFT.h":
ctypedef int cl_int
ctypedef unsigned int cl_uint
ctypedef unsigned long int cl_ulong
ctypedef float cl_float
ctypedef void* cl_context
ctypedef void* cl_command_queue
ctypedef void* cl_event
ctypedef void* cl_mem
# cdef struct _cl_context:
# pass
# ctypedef _cl_context *cl_context
# cdef struct _cl_command_queue:
# pass
# ctypedef _cl_command_queue *cl_command_queue
# cdef struct _cl_event:
# pass
# ctypedef _cl_event *cl_event
# cdef struct _cl_mem:
# pass
# ctypedef _cl_mem *cl_mem
enum:
CLFFT_DUMP_PROGRAMS ##define constant
cpdef enum clfftStatus_:
CLFFT_INVALID_GLOBAL_WORK_SIZE
CLFFT_INVALID_MIP_LEVEL
CLFFT_INVALID_BUFFER_SIZE
CLFFT_INVALID_GL_OBJECT
CLFFT_INVALID_OPERATION
CLFFT_INVALID_EVENT
CLFFT_INVALID_EVENT_WAIT_LIST
CLFFT_INVALID_GLOBAL_OFFSET
CLFFT_INVALID_WORK_ITEM_SIZE
CLFFT_INVALID_WORK_GROUP_SIZE
CLFFT_INVALID_WORK_DIMENSION
CLFFT_INVALID_KERNEL_ARGS
CLFFT_INVALID_ARG_SIZE
CLFFT_INVALID_ARG_VALUE
CLFFT_INVALID_ARG_INDEX
CLFFT_INVALID_KERNEL
CLFFT_INVALID_KERNEL_DEFINITION
CLFFT_INVALID_KERNEL_NAME
CLFFT_INVALID_PROGRAM_EXECUTABLE
CLFFT_INVALID_PROGRAM
CLFFT_INVALID_BUILD_OPTIONS
CLFFT_INVALID_BINARY
CLFFT_INVALID_SAMPLER
CLFFT_INVALID_IMAGE_SIZE
CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR
CLFFT_INVALID_MEM_OBJECT
CLFFT_INVALID_HOST_PTR
CLFFT_INVALID_COMMAND_QUEUE
CLFFT_INVALID_QUEUE_PROPERTIES
CLFFT_INVALID_CONTEXT
CLFFT_INVALID_DEVICE
CLFFT_INVALID_PLATFORM
CLFFT_INVALID_DEVICE_TYPE
CLFFT_INVALID_VALUE
CLFFT_MAP_FAILURE
CLFFT_BUILD_PROGRAM_FAILURE
CLFFT_IMAGE_FORMAT_NOT_SUPPORTED
CLFFT_IMAGE_FORMAT_MISMATCH
CLFFT_MEM_COPY_OVERLAP
CLFFT_PROFILING_INFO_NOT_AVAILABLE
CLFFT_OUT_OF_HOST_MEMORY
CLFFT_OUT_OF_RESOURCES
CLFFT_MEM_OBJECT_ALLOCATION_FAILURE
CLFFT_COMPILER_NOT_AVAILABLE
CLFFT_DEVICE_NOT_AVAILABLE
CLFFT_DEVICE_NOT_FOUND
CLFFT_SUCCESS
CLFFT_BUGCHECK
CLFFT_NOTIMPLEMENTED
CLFFT_TRANSPOSED_NOTIMPLEMENTED
CLFFT_FILE_NOT_FOUND
CLFFT_FILE_CREATE_FAILURE
CLFFT_VERSION_MISMATCH
CLFFT_INVALID_PLAN
CLFFT_DEVICE_NO_DOUBLE
CLFFT_DEVICE_MISMATCH
ctypedef clfftStatus_ clfftStatus
cpdef enum clfftDim_:
CLFFT_1D
CLFFT_2D
CLFFT_3D
ctypedef clfftDim_ clfftDim
cpdef enum clfftLayout_:
CLFFT_COMPLEX_INTERLEAVED
CLFFT_COMPLEX_PLANAR
CLFFT_HERMITIAN_INTERLEAVED
CLFFT_HERMITIAN_PLANAR
CLFFT_REAL
ctypedef clfftLayout_ clfftLayout
cpdef enum clfftPrecision_:
CLFFT_SINGLE
CLFFT_DOUBLE
CLFFT_SINGLE_FAST
CLFFT_DOUBLE_FAST
ctypedef clfftPrecision_ clfftPrecision
cpdef enum clfftDirection_:
CLFFT_FORWARD
CLFFT_BACKWARD
CLFFT_MINUS
CLFFT_PLUS
ctypedef clfftDirection_ clfftDirection
cpdef enum clfftResultLocation_:
CLFFT_INPLACE
CLFFT_OUTOFPLACE
ctypedef clfftResultLocation_ clfftResultLocation
cpdef enum clfftResultTransposed_:
CLFFT_NOTRANSPOSE
CLFFT_TRANSPOSED
ctypedef clfftResultTransposed_ clfftResultTransposed
cdef struct clfftSetupData_:
cl_uint major
cl_uint minor
cl_uint patch
cl_ulong debugFlags
ctypedef clfftSetupData_ clfftSetupData
cpdef enum clfftCallbackType_:
PRECALLBACK
POSTCALLBACK
ctypedef clfftCallbackType_ clfftCallbackType
ctypedef size_t clfftPlanHandle
clfftStatus clfftInitSetupData(clfftSetupData *setupData)
clfftStatus clfftSetup(const clfftSetupData *setupData)
clfftStatus clfftTeardown()
clfftStatus clfftGetVersion(cl_uint *major, cl_uint *minor, cl_uint *patch)
clfftStatus clfftCreateDefaultPlan(clfftPlanHandle *plHandle, cl_context context,
#const clfftDim dim,
clfftDim dim,
const size_t *clLengths)
clfftStatus clfftCopyPlan(clfftPlanHandle *out_plHandle, cl_context new_context, clfftPlanHandle in_plHandle)
clfftStatus clfftBakePlan(clfftPlanHandle plHandle,
cl_uint numQueues,
cl_command_queue *commQueueFFT,
#void (*pfn_notify)(unsigned long, void *),
void (*pfn_notify)(clfftPlanHandle plHandle, void *user_data),
void *user_data)
clfftStatus clfftDestroyPlan(clfftPlanHandle *plHandle)
clfftStatus clfftGetPlanContext(const clfftPlanHandle plHandle, cl_context *context)
clfftStatus clfftGetPlanPrecision(const clfftPlanHandle plHandle, clfftPrecision *precision)
clfftStatus clfftSetPlanPrecision(clfftPlanHandle plHandle, clfftPrecision precision)
clfftStatus clfftGetPlanScale(const clfftPlanHandle plHandle, clfftDirection dir, cl_float *scale)
clfftStatus clfftSetPlanScale(clfftPlanHandle plHandle, clfftDirection dir, cl_float scale)
clfftStatus clfftGetPlanBatchSize(const clfftPlanHandle plHandle, size_t *batchSize)
clfftStatus clfftSetPlanBatchSize(clfftPlanHandle plHandle, size_t batchSize)
clfftStatus clfftGetPlanDim(const clfftPlanHandle plHandle, clfftDim *dim, cl_uint *size)
clfftStatus clfftSetPlanDim(clfftPlanHandle plHandle, const clfftDim dim)
clfftStatus clfftGetPlanLength(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clLengths)
clfftStatus clfftSetPlanLength(clfftPlanHandle plHandle, const clfftDim dim, const size_t *clLengths)
clfftStatus clfftGetPlanInStride(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides)
clfftStatus clfftSetPlanInStride(clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides)
clfftStatus clfftGetPlanOutStride(const clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides)
clfftStatus clfftSetPlanOutStride(clfftPlanHandle plHandle, const clfftDim dim, size_t *clStrides)
clfftStatus clfftGetPlanDistance(const clfftPlanHandle plHandle, size_t *iDist, size_t *oDist)
clfftStatus clfftSetPlanDistance(clfftPlanHandle plHandle, size_t iDist, size_t oDist)
clfftStatus clfftGetLayout(const clfftPlanHandle plHandle, clfftLayout *iLayout, clfftLayout *oLayout)
clfftStatus clfftSetLayout(clfftPlanHandle plHandle, clfftLayout iLayout, clfftLayout oLayout)
clfftStatus clfftGetResultLocation(const clfftPlanHandle plHandle, clfftResultLocation *placeness)
clfftStatus clfftSetResultLocation(clfftPlanHandle plHandle, clfftResultLocation placeness)
clfftStatus clfftGetPlanTransposeResult(const clfftPlanHandle plHandle, clfftResultTransposed *transposed)
clfftStatus clfftSetPlanTransposeResult(clfftPlanHandle plHandle, clfftResultTransposed transposed)
clfftStatus clfftGetTmpBufSize(const clfftPlanHandle plHandle, size_t *buffersize)
clfftStatus clfftSetPlanCallback(clfftPlanHandle plHandle,
const char* funcName,
const char* funcString,
int localMemSize,
clfftCallbackType callbackType,
cl_mem *userdata,
int numUserdataBuffers)
clfftStatus clfftEnqueueTransform(clfftPlanHandle plHandle,
clfftDirection dir,
cl_uint numQueuesAndEvents,
cl_command_queue *commQueues,
cl_uint numWaitEvents,
const cl_event *waitEvents,
cl_event *outEvents,
cl_mem *inputBuffers,
cl_mem *outputBuffers,
cl_mem tmpBuffer
)
gpyfft-0.7.1/gpyfft/gpyfftlib.pyx 0000664 0000000 0000000 00000053616 13177652365 0017077 0 ustar 00root root 0000000 0000000 # -*- coding: latin-1 -*-
"""
.. module:: gpyfft
:platform: Windows, Linux
:synopsis: A Python wrapper for the OpenCL FFT library clFFT
.. moduleauthor:: Gregor Thalhammer
"""
cimport cython
import pyopencl as cl
from libc.stdlib cimport malloc, free
import atexit
try:
from weakref import finalize
except ImportError:
from backports.weakref import finalize
ctypedef size_t voidptr_t
DEF MAX_QUEUES = 5
DEF MAX_WAITFOR_EVENTS = 10
error_dict = {
CLFFT_SUCCESS: 'no error',
CLFFT_BUGCHECK: 'Bugcheck',
CLFFT_NOTIMPLEMENTED: 'Functionality is not implemented yet.',
CLFFT_TRANSPOSED_NOTIMPLEMENTED: 'Transposed functionality is not implemented for this transformation.',
CLFFT_FILE_NOT_FOUND: 'Tried to open an existing file on the host system, but failed.',
CLFFT_FILE_CREATE_FAILURE: 'Tried to create a file on the host system, but failed.',
CLFFT_VERSION_MISMATCH: 'Version conflict between client and library.',
CLFFT_INVALID_PLAN: 'Invalid plan.',
CLFFT_DEVICE_NO_DOUBLE: 'Double precision not supported on this device.',
CLFFT_DEVICE_MISMATCH: 'Attempt to run on a device using a plan baked for a different device',
}
class GpyFFT_Error(Exception):
"""Exception wrapper for errors returned from underlying library calls"""
def __init__(self, errorcode):
self.errorcode = errorcode
def __str__(self):
error_desc = error_dict.get(self.errorcode)
if error_desc is None:
try:
error_desc = cl.status_code.to_string(self.errorcode)
except ValueError:
error_desc = "unknown error %d", self.errorcode
return repr(error_desc)
cdef inline bint errcheck(clfftStatus result) except True:
cdef bint is_error = (result != CLFFT_SUCCESS)
if is_error:
raise GpyFFT_Error(result)
return is_error
_initialized=False
#main class
cdef class GpyFFT(object):
"""The GpyFFT object is the primary interface to the clFFT library"""
def __cinit__(self, debug = False):
if not _initialized:
GpyFFT._initialize(debug)
@classmethod
@cython.binding(True)
def _initialize(cls, debug = False):
# print 'initialize clfft'
global _initialized
if _initialized:
raise RuntimeError('GpyFFT is already initialized')
cdef clfftSetupData setup_data
errcheck(clfftInitSetupData(&setup_data))
if debug:
setup_data.debugFlags |= CLFFT_DUMP_PROGRAMS
errcheck(clfftSetup(&setup_data))
_initialized=True
atexit.register(GpyFFT._teardown)
@classmethod
@cython.binding(True)
def _teardown(cls):
# print 'teardown clfft'
errcheck(clfftTeardown())
global _initialized
_initialized=False
def get_version(self):
"""returns the version of the underlying clFFT library
Parameters
----------
None
Returns
-------
out : tuple
the major, minor, and patch level of the clFFT library
Raises
------
GpyFFT_Error
An error occurred accessing the clfftGetVersion function
Notes
-----
The underlying clFFT call is 'clfftCreateDefaultPlan'
"""
cdef cl_uint major, minor, patch
errcheck(clfftGetVersion(&major, &minor, &patch))
return (major, minor, patch)
def create_plan(self, context, tuple shape):
"""creates an FFT Plan object based on the requested dimensionality
Parameters
----------
context : `pypencl.Context`
shape : tuple of int
containing from one to three integers, specifying the
length of each requested dimension of the FFT
Returns
-------
plan : `Plan`
The generated gpyfft.Plan.
Raises
------
ValueError
when `shape` isn't a tuple of length 1, 2 or 3
TypeError
when the context argument is not a `pyopencl.Context`
"""
return Plan(context, shape, self)
cdef _destroy_plan(clfftPlanHandle plan):
cdef clfftPlanHandle p=plan
#print 'destroy plan', p
errcheck(clfftDestroyPlan(&p))
#@cython.internal
cdef class Plan(object):
"""A plan is the collection of (almost) all parameters needed to specify
an FFT computation. This includes:
* What pyopencl context executes the transform?
* Is this a 1D, 2D or 3D transform?
* What are the lengths or extents of the data in each dimension?
* How many datasets are being transformed?
* What is the data precision?
* Should a scaling factor be applied to the transformed data?
* Does the output transformed data replace the original input data in the same buffer (or buffers), or is the output data written to a different buffer (or buffers).
* How is the input data stored in its data buffers?
* How is the output data stored in its data buffers?
The plan does not include:
* The pyopencl handles to the input and output data buffers.
* The pyopencl handle to a temporary scratch buffer (if needed).
* Whether to execute a forward or reverse transform.
These are specified later, when the plan is executed.
"""
cdef object __weakref__
cdef clfftPlanHandle plan
cdef object lib
#def __dealloc__(self):
#if self.plan:
# errcheck(clfftDestroyPlan(&self.plan))
#print 'dealloc plan', self.plan
def __cinit__(self):
self.plan = 0
def __init__(self, context, tuple shape, lib):
"""Instantiates a Plan object
Plan objects are created internally by gpyfft; normally
a user does not create these objects
Parameters
----------
contex : pyopencl.Context
http://documen.tician.de/pyopencl/runtime.html#pyopencl.Context
shape : tuple
the dimensionality of the transform
lib : no idea
this is a thing that does lib things
Raises
------
ValueError
when the shape isn't a tuple of length 1, 2 or 3
TypeError
because the context argument isn't a valid pyopencl.Context
Notes
-----
The underlying clFFT call is 'clfftCreateDefaultPlan'
"""
self.lib = lib
if not isinstance(context, cl.Context):
raise TypeError('expected cl.Context as type of first argument')
cdef cl_context context_handle = context.int_ptr
ndim = len(shape)
if ndim not in (1,2,3):
raise ValueError('expected shape to be tuple of length 1,2 or 3')
cdef size_t lengths[3]
cdef int i
for i in range(ndim):
lengths[i] = shape[i]
cdef clfftDim ndim_cl = CLFFT_1D
if ndim==1:
ndim_cl = CLFFT_1D
elif ndim==2:
ndim_cl = CLFFT_2D
elif ndim==3:
ndim_cl = CLFFT_3D
clfftCreateDefaultPlan(&self.plan, context_handle, ndim_cl, &lengths[0])
finalize(self, _destroy_plan, self.plan)
#print 'init plan', self.plan
property precision:
"""the floating point precision of the FFT data"""
def __get__(self):
cdef clfftPrecision precision
errcheck(clfftGetPlanPrecision(self.plan, &precision))
return clfftPrecision_(precision)
def __set__(self, clfftPrecision value):
errcheck(clfftSetPlanPrecision(self.plan, value))
property scale_forward:
"""the scaling factor to be applied to the FFT data for forward transforms"""
def __get__(self):
cdef cl_float scale
errcheck(clfftGetPlanScale(self.plan, CLFFT_FORWARD, &scale))
return scale
def __set__(self, cl_float value):
errcheck(clfftSetPlanScale(self.plan, CLFFT_FORWARD, value))
property scale_backward:
"""the scaling factor to be applied to the FFT data for backward transforms"""
def __get__(self):
cdef cl_float scale
errcheck(clfftGetPlanScale(self.plan, CLFFT_BACKWARD, &scale))
return scale
def __set__(self, cl_float value):
errcheck(clfftSetPlanScale(self.plan, CLFFT_BACKWARD, value))
property batch_size:
"""the number of discrete arrays that this plan can handle concurrently"""
def __get__(self):
cdef size_t nbatch
errcheck(clfftGetPlanBatchSize(self.plan, &nbatch))
return nbatch
def __set__(self, nbatch):
errcheck(clfftSetPlanBatchSize(self.plan, nbatch))
cdef clfftDim get_dim(self):
cdef clfftDim dim
cdef cl_uint size
errcheck(clfftGetPlanDim(self.plan, &dim, &size))
return dim
property shape:
"""the length of each dimension of the FFT"""
def __get__(self):
cdef clfftDim dim = self.get_dim()
cdef size_t sizes[3]
errcheck(clfftGetPlanLength(self.plan, dim, &sizes[0]))
if dim == 1:
return (sizes[0],)
elif dim == 2:
return (sizes[0], sizes[1])
elif dim == 3:
return (sizes[0], sizes[1], sizes[2])
def __set__(self, tuple shape):
assert len(shape) <= 3
cdef clfftDim dim = len(shape)
#errcheck(clfftSetPlanDim(self.plan, dim))
cdef size_t sizes[3]
cdef int i
for i in range(len(shape)):
sizes[i] = shape[i]
errcheck(clfftSetPlanLength(self.plan, dim, &sizes[0]))
property strides_in:
"""the distance between consecutive elements for input buffers
in a dimension"""
def __get__(self):
cdef clfftDim dim = self.get_dim()
cdef size_t strides[3]
errcheck(clfftGetPlanInStride(self.plan, dim, strides))
if dim == 1:
return (strides[0],)
elif dim == 2:
return (strides[0], strides[1])
elif dim == 3:
return (strides[0], strides[1], strides[2])
def __set__(self, tuple strides):
assert len(strides) <= 3
cdef clfftDim dim = len(strides)
cdef size_t c_strides[3]
cdef int i
for i in range(dim):
c_strides[i] = strides[i]
errcheck(clfftSetPlanInStride(self.plan, dim, &c_strides[0]))
property strides_out:
"""the distance between consecutive elements for output buffers
in a dimension"""
def __get__(self):
cdef clfftDim dim = self.get_dim()
cdef size_t strides[3]
errcheck(clfftGetPlanOutStride(self.plan, dim, strides))
if dim == 1:
return (strides[0],)
elif dim == 2:
return (strides[0], strides[1])
elif dim == 3:
return (strides[0], strides[1], strides[2])
def __set__(self, tuple strides):
assert len(strides) <= 3
cdef clfftDim dim = len(strides)
cdef size_t c_strides[3]
cdef int i
for i in range(dim):
c_strides[i] = strides[i]
errcheck(clfftSetPlanOutStride(self.plan, dim, &c_strides[0]))
property distances:
"""the distance between array objects"""
def __get__(self):
cdef size_t dist_in, dist_out
errcheck(clfftGetPlanDistance(self.plan, &dist_in, &dist_out))
return (dist_in, dist_out)
def __set__(self, tuple distances):
assert len(distances) == 2
errcheck(clfftSetPlanDistance(self.plan, distances[0], distances[1]))
property layouts:
"""the expected layout of the output buffers"""
def __get__(self):
cdef clfftLayout layout_in, layout_out
errcheck(clfftGetLayout(self.plan, &layout_in, &layout_out))
return (layout_in, layout_out)
def __set__(self, tuple layouts):
assert len(layouts) == 2
errcheck(clfftSetLayout(self.plan, layouts[0], layouts[1]))
property inplace:
"""determines if the input buffers are going to be overwritten with
results (True == inplace, False == out of place)"""
def __get__(self):
cdef clfftResultLocation placeness
errcheck(clfftGetResultLocation(self.plan, &placeness))
return placeness == CLFFT_INPLACE
def __set__(self, value):
cdef clfftResultLocation placeness
if value:
placeness = CLFFT_INPLACE
else:
placeness = CLFFT_OUTOFPLACE
errcheck(clfftSetResultLocation(self.plan, placeness))
property temp_array_size:
"""Buffer size (in bytes), which may be needed internally for
an intermediate buffer. Requires that transform plan is baked
before."""
def __get__(self):
cdef size_t buffersize
errcheck(clfftGetTmpBufSize(self.plan, &buffersize))
return buffersize
property transpose_result:
"""the final transpose setting of a multi-dimensional FFT
True: transpose the final result (default)
False: skip final transpose
"""
def __get__(self):
cdef clfftResultTransposed transposed
errcheck(clfftGetPlanTransposeResult(self.plan, &transposed))
return transposed == CLFFT_TRANSPOSED
def __set__(self, transpose):
cdef clfftResultTransposed transposed
if transpose:
transposed = CLFFT_TRANSPOSED
else:
transposed = CLFFT_NOTRANSPOSE
errcheck(clfftSetPlanTransposeResult(self.plan, transposed))
def bake(self, queues):
"""Prepare the plan for execution.
Prepares and compiles OpenCL kernels internally used to
perform the transform. At this point, the clfft runtime
applies all implemented optimizations, possibly including
running kernel experiments on the devices in the plan
context. This can take a long time to execute. If not called,
this is performed when the plan is execute for the first time.
Parameters
----------
queues : `pyopencl.CommandQueue` or list of `pyopencl.CommandQueue`
Returns
-------
None
Raises
------
`GpyFFT_Error`
An error occurred accessing the clfftBakePlan function
Notes
-----
The underlying clFFT call is 'clfftBakePlan'
"""
if isinstance(queues, cl.CommandQueue):
queues = (queues,)
cdef int n_queues = len(queues)
assert n_queues <= MAX_QUEUES
cdef cl_command_queue queues_[MAX_QUEUES]
cdef int i
for i in range(n_queues):
assert isinstance(queues[i], cl.CommandQueue)
queues_[i] = queues[i].int_ptr
errcheck(clfftBakePlan(self.plan,
n_queues, queues_,
NULL, NULL))
def set_callback(self,
func_name,
func_string,
callback_type,
local_mem_size=0,
user_data=None):
"""Register callback.
Parameters
----------
func_name: bytes
callback function name
func_string: bytes
callback function, gets inlined in OpenCL kernel
callback_type: 'pre' or 'post'
local_mem_size: int
size (bytes) of the local memory used by the callback
user_data:
pyopencl.Buffer or iterable of pyopencl.Buffer
Notes
-----
The underlying clFFT call 'clSetPlanCallback'
"""
typedict = {'pre': PRECALLBACK,
'post': POSTCALLBACK}
clfft_callback_type = typedict[callback_type]
if user_data is None:
user_data = ()
if isinstance(user_data, cl.Buffer):
user_data = (user_data,)
n_user_data_buffers = len(user_data)
cdef cl_mem* user_buffers = NULL
if n_user_data_buffers:
user_buffers = malloc(n_user_data_buffers*sizeof(cl_mem))
for n, user_data_buffer in enumerate(user_data):
assert isinstance(user_data_buffer, cl.Buffer)
user_buffers[n] = user_data_buffer.int_ptr
try:
res = clfftSetPlanCallback(self.plan,
func_name,
func_string,
local_mem_size,
clfft_callback_type,
user_buffers,
n_user_data_buffers)
finally:
free(user_buffers)
errcheck(res)
def enqueue_transform(self,
queues,
in_buffers,
out_buffers = None,
direction_forward = True,
wait_for_events = None,
temp_buffer = None,
):
"""Enqueue an FFT transform operation, and return immediately.
Parameters
----------
queues : pyopencl.CommandQueue or iterable of pyopencl.CommandQueue
in_buffers : pyopencl.Buffer or iterable (1 or 2 items) of pyopencl.Buffer
out_buffers : pyopencl.Buffer or iterable (1 or 2 items) of pyopencl.Buffer, optional
can be None for inplace transforms
Other Parameters
----------------
direction_forward : bool, optional
Perform forward transform (default True).
wait_for_events : iterable of pyopencl.Event, optional
Ensures that all events in this list have finished
execution before transform is performed.
temp_buffer : pyopencl.Buffer, optional
For intermediate results a temporary buffer can be
provided. The size (in bytes) of this buffer is given by
the `temp_array_size` property.
Returns
-------
tuple of `pyopencl.Event`, one event for each command queue in `queues`
Raises
------
`GpyFFT_Error`
An error occurred accessing the clfftEnqueueTransform function
Notes
-----
The underlying clFFT call is 'clfftEnqueueTransform'
"""
cdef int i
cdef clfftDirection direction
if direction_forward:
direction = CLFFT_FORWARD
else:
direction = CLFFT_BACKWARD
cdef cl_command_queue queues_[MAX_QUEUES]
if isinstance(queues, cl.CommandQueue):
queues = (queues,)
n_queues = len(queues)
assert n_queues <= MAX_QUEUES
for i, queue in enumerate(queues):
assert isinstance(queue, cl.CommandQueue)
queues_[i] = queue.int_ptr
cdef cl_event wait_for_events_array[MAX_WAITFOR_EVENTS]
cdef cl_event* wait_for_events_ = NULL
cdef n_waitfor_events = 0
if wait_for_events is not None and len(wait_for_events) > 0:
n_waitfor_events = len(wait_for_events)
assert n_waitfor_events <= MAX_WAITFOR_EVENTS
for i, event in enumerate(wait_for_events):
assert isinstance(event, cl.Event)
wait_for_events_array[i] = event.int_ptr
wait_for_events_ = &wait_for_events_array[0]
cdef cl_mem in_buffers_[2]
if isinstance(in_buffers, cl.MemoryObjectHolder):
in_buffers = (in_buffers,)
n_in_buffers = len(in_buffers)
assert n_in_buffers <= 2
for i, in_buffer in enumerate(in_buffers):
assert isinstance(in_buffer, cl.MemoryObjectHolder)
in_buffers_[i] = in_buffer.int_ptr
cdef cl_mem out_buffers_array[2]
cdef cl_mem* out_buffers_ = NULL
if out_buffers is not None:
if isinstance(out_buffers, cl.MemoryObjectHolder):
out_buffers = (out_buffers,)
n_out_buffers = len(out_buffers)
assert n_out_buffers in (1,2)
for i, out_buffer in enumerate(out_buffers):
assert isinstance(out_buffer, cl.MemoryObjectHolder)
out_buffers_array[i] = out_buffer.int_ptr
out_buffers_ = &out_buffers_array[0]
cdef cl_mem tmp_buffer_ = NULL
if temp_buffer is not None:
assert isinstance(temp_buffer, cl.MemoryObjectHolder)
tmp_buffer_ = temp_buffer.int_ptr
cdef cl_event out_cl_events[MAX_QUEUES]
errcheck(clfftEnqueueTransform(self.plan,
direction,
n_queues,
&queues_[0],
n_waitfor_events,
&wait_for_events_[0],
out_cl_events,
&in_buffers_[0],
out_buffers_,
tmp_buffer_))
return tuple((cl.Event.from_int_ptr(out_cl_events[i], retain=False) for i in range(n_queues)))
#gpyfft = GpyFFT()
#cdef Plan PlanFactory():
#cdef Plan instance = Plan.__new__(Ref)
#instance.plan = None
#return instance
gpyfft-0.7.1/gpyfft/test/ 0000775 0000000 0000000 00000000000 13177652365 0015313 5 ustar 00root root 0000000 0000000 gpyfft-0.7.1/gpyfft/test/__init__.py 0000664 0000000 0000000 00000000000 13177652365 0017412 0 ustar 00root root 0000000 0000000 gpyfft-0.7.1/gpyfft/test/test_batched.py 0000664 0000000 0000000 00000004147 13177652365 0020324 0 ustar 00root root 0000000 0000000 from __future__ import print_function
import unittest
from nose_parameterized import parameterized
import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft import FFT
from gpyfft.test.util import get_contexts
contexts = [(ctx,) for ctx in get_contexts()]
class test_fft_batched(unittest.TestCase):
@parameterized.expand(contexts)
def test_2d_out_of_place(self, ctx):
queue = cl.CommandQueue(ctx)
L = 4
M = 64
N = 32
axes = (-1, -2)
nd_data = np.arange(L*M*N, dtype=np.complex64)
nd_data.shape = (L, M, N)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros_like(cl_data)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
axes = axes,
)
transform.enqueue()
print(cl_data_transformed.get)
print(np.fft.fft2(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.fft2(nd_data, axes=axes),
rtol=1e-3, atol=1e-3)
@parameterized.expand(contexts)
def test_2d_in_4d_out_of_place(self, ctx):
queue = cl.CommandQueue(ctx)
L1 = 4
L2 = 5
M = 64
N = 32
axes = (-1, -2) #ok
#axes = (0,1) #ok
#axes = (0,2) #cannot be collapsed
nd_data = np.arange(L1*L2*M*N, dtype=np.complex64)
nd_data.shape = (L1, L2, M, N)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros_like(cl_data)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
axes = axes,
)
transform.enqueue()
print(cl_data_transformed.get)
print(np.fft.fft2(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.fft2(nd_data, axes=axes),
rtol=1e-3, atol=1e-3)
gpyfft-0.7.1/gpyfft/test/test_callback.py 0000664 0000000 0000000 00000012247 13177652365 0020466 0 ustar 00root root 0000000 0000000 # coding: utf-8
from __future__ import absolute_import, division, print_function
import unittest
import os
import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft.gpyfftlib import *
from gpyfft.test.util import get_contexts
class TestCallbackPreMul(unittest.TestCase):
callback_kernel_src_premul = b"""
float2 premul(__global void* in,
uint inoffset,
__global void* userdata
//__local void* localmem
)
{
float scalar = *((__global float*)userdata + inoffset);
float2 ret = *((__global float2*)in + inoffset) * scalar;
return ret;
}
"""
def test_callback_pre(self):
for ctx in get_contexts():
self.callback_pre(ctx)
def callback_pre(self, context):
print("context:", context)
queue = cl.CommandQueue(context)
nd_data = np.array([[1, 2, 3, 4],
[5, 6, 5, 2]],
dtype=np.complex64)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.empty_like(cl_data)
print("cl_data:")
print(cl_data)
print('nd_data.shape/strides:', nd_data.shape, nd_data.strides)
print('cl_data.shape/strides:', cl_data.shape, cl_data.strides)
print('cl_data_transformed.shape/strides:', cl_data_transformed.shape, cl_data_transformed.strides)
G = GpyFFT(debug=False)
plan = G.create_plan(context, cl_data.shape)
plan.strides_in = tuple(x // cl_data.dtype.itemsize for x in cl_data.strides)
plan.strides_out = tuple(x // cl_data.dtype.itemsize for x in cl_data_transformed.strides)
print('plan.strides_in', plan.strides_in)
print('plan.strides_out', plan.strides_out)
print('plan.distances', plan.distances)
print('plan.batch_size', plan.batch_size)
plan.inplace = False
plan.precision = CLFFT_SINGLE
print('plan.precision:', plan.precision)
plan.scale_forward = 1.
print('plan.scale_forward:', plan.scale_forward)
#print('plan.transpose_result:', plan.transpose_result)
nd_user_data = np.array([[2, 2, 2, 2],
[3, 4, 5, 6]],
dtype=np.float32)
cl_user_data = cla.to_device(queue, nd_user_data)
print('cl_user_data')
print(cl_user_data)
plan.set_callback(b'premul',
self.callback_kernel_src_premul,
'pre',
user_data=cl_user_data.data)
plan.bake(queue)
print('plan.temp_array_size:', plan.temp_array_size)
plan.enqueue_transform((queue,),
(cl_data.data,),
(cl_data_transformed.data,)
)
queue.finish()
print('cl_data_transformed:')
print(cl_data_transformed)
print('fft(nd_data * nd_user_data):')
print(np.fft.fftn(nd_data * nd_user_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.fftn(nd_data * nd_user_data))
del plan
callback_kernel_src_postset = b"""
float2 postset(__global void* output,
uint offset,
__global void* userdata,
float2 fftoutput)
{
float scalar = *((__global float*)userdata + offset);
*((__global float2*)output + offset) = fftoutput * scalar;
}
"""
def test_callback_post(self):
for ctx in get_contexts():
self.callback_post(ctx)
def callback_post(self, context):
print("context:", context)
queue = cl.CommandQueue(context)
nd_data = np.array([[1, 2, 3, 4],
[5, 6, 5, 2]],
dtype=np.complex64)
nd_user_data = np.array([[2, 2, 2, 2],
[3, 4, 5, 6]],
dtype=np.float32)
cl_data = cla.to_device(queue, nd_data)
cl_user_data = cla.to_device(queue, nd_user_data)
cl_data_transformed = cla.empty_like(cl_data)
G = GpyFFT(debug=False)
plan = G.create_plan(context, cl_data.shape)
plan.strides_in = tuple(x // cl_data.dtype.itemsize for x in cl_data.strides)
plan.strides_out = tuple(x // cl_data.dtype.itemsize for x in cl_data_transformed.strides)
plan.inplace = False
plan.precision = CLFFT_SINGLE
plan.set_callback(b'postset',
self.callback_kernel_src_postset,
'post',
user_data=cl_user_data.data)
plan.bake(queue)
plan.enqueue_transform((queue,),
(cl_data.data,),
(cl_data_transformed.data,)
)
queue.finish()
print('cl_data_transformed:')
print(cl_data_transformed)
print('fft(nd_data) * nd_user_data')
print(np.fft.fftn(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.fftn(nd_data) * nd_user_data)
del plan
#TODO: create TestSuite
gpyfft-0.7.1/gpyfft/test/test_gpyfft.py 0000664 0000000 0000000 00000010737 13177652365 0020233 0 ustar 00root root 0000000 0000000 from __future__ import print_function
import unittest
from nose_parameterized import parameterized
import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft import FFT
from gpyfft.test.util import get_contexts, has_double
"""
Some basic tests for high-level interface
"""
#TODO: perform tests for all contexts
contexts = [(ctx,) for ctx in get_contexts()]
#contexts = [] # see what happens if no OpenCL GPU device available
class test_fft(unittest.TestCase):
def test_context(self):
self.assertTrue(contexts, msg='no OpenCL GPU device available')
@parameterized.expand(contexts)
def test_1d_inplace(self, ctx):
queue = cl.CommandQueue(ctx)
nd_data = np.arange(32, dtype=np.complex64)
cl_data = cla.to_device(queue, nd_data)
#cl_data_transformed = cla.zeros_like(cl_data)
transform = FFT(ctx, queue,
cl_data)
transform.enqueue()
#print(cl_data)
#print(np.fft.fft(nd_data))
assert np.allclose(cl_data.get(),
np.fft.fft(nd_data))
@parameterized.expand(contexts)
def test_1d_out_of_place(self, ctx):
queue = cl.CommandQueue(ctx)
nd_data = np.arange(32, dtype=np.complex64)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros_like(cl_data)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed
)
transform.enqueue()
assert np.allclose(cl_data_transformed.get(),
np.fft.fft(nd_data))
@parameterized.expand(contexts)
def test_1d_inplace_double(self, ctx):
if not has_double(ctx): #TODO: find better way to skip test
return
queue = cl.CommandQueue(ctx)
nd_data = np.arange(32, dtype=np.complex128)
cl_data = cla.to_device(queue, nd_data)
transform = FFT(ctx, queue,
cl_data)
transform.enqueue()
assert np.allclose(cl_data.get(),
np.fft.fft(nd_data))
@parameterized.expand(contexts)
def test_1d_real_to_complex(self, ctx):
queue = cl.CommandQueue(ctx)
N = 32
nd_data = np.arange(N, dtype=np.float32)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros(queue, (N//2+1,), dtype = np.complex64)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
)
transform.enqueue()
assert np.allclose(cl_data_transformed.get(),
np.fft.rfft(nd_data))
@parameterized.expand(contexts)
def test_2d_real_to_complex(self, ctx):
queue = cl.CommandQueue(ctx)
M = 64
N = 32
nd_data = np.arange(M*N, dtype=np.float32)
nd_data.shape = (M, N)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex64)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
axes = (1,0),
)
transform.enqueue()
print(cl_data_transformed.get)
print(np.fft.rfft2(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.rfft2(nd_data),
rtol=1e-3, atol=1e-3)
@parameterized.expand(contexts)
def test_2d_real_to_complex_double(self, ctx):
if not has_double(ctx): #TODO: find better way to skip test
return
queue = cl.CommandQueue(ctx)
M = 64
N = 32
nd_data = np.arange(M*N, dtype=np.float64)
nd_data.shape = (M, N)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros(queue, (M, N//2+1), dtype = np.complex128)
transform = FFT(ctx, queue,
cl_data,
cl_data_transformed,
axes = (1,0),
)
transform.enqueue()
print(cl_data_transformed.get)
print(np.fft.rfft2(nd_data))
assert np.allclose(cl_data_transformed.get(),
np.fft.rfft2(nd_data),
rtol=1e-8, atol=1e-8)
if __name__ == '__main__':
unittest.main()
gpyfft-0.7.1/gpyfft/test/test_gpyfftlib.py 0000664 0000000 0000000 00000002150 13177652365 0020710 0 ustar 00root root 0000000 0000000 from __future__ import print_function
import unittest
import numpy as np
import pyopencl as cl
import pyopencl.array as cla
from gpyfft import gpyfftlib
from gpyfft.test.util import get_contexts
"""
Some basic tests
"""
class test_basic(unittest.TestCase):
def test_basic(self):
G = gpyfftlib.GpyFFT()
print('clFFT version:', G.get_version())
del G
#@unittest.skip('segfaults with pytest')
def test_create_plan(self):
G = gpyfftlib.GpyFFT()
ctx = get_contexts()[0]
queue = cl.CommandQueue(ctx)
nd_data = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]],
dtype=np.complex64)
cl_data = cla.to_device(queue, nd_data)
cl_data_transformed = cla.zeros_like(cl_data)
plan = G.create_plan(ctx, cl_data.shape)
print('plan.strides_in', plan.strides_in)
print('plan.strides_out', plan.strides_out)
print('plan.distances', plan.distances)
print('plan.batch_size', plan.batch_size)
del plan
del G
if __name__ == '__main__':
unittest.main()
gpyfft-0.7.1/gpyfft/test/util.py 0000664 0000000 0000000 00000000706 13177652365 0016645 0 ustar 00root root 0000000 0000000 import pyopencl as cl
def get_contexts():
"""
Return list of OpenCL contexts for all (GPU) devices present in the system.
"""
ALL_DEVICES = []
for platform in cl.get_platforms():
ALL_DEVICES += platform.get_devices(device_type = cl.device_type.GPU)
contexts = [ cl.Context([device]) for device in ALL_DEVICES ]
return contexts
def has_double(ctx):
dev = ctx.devices[0]
return 'cl_khr_fp64' in dev.extensions
gpyfft-0.7.1/gpyfft/version.py 0000664 0000000 0000000 00000000026 13177652365 0016371 0 ustar 00root root 0000000 0000000 __version__ = '0.7.1'
gpyfft-0.7.1/setup.cfg 0000664 0000000 0000000 00000000177 13177652365 0014663 0 ustar 00root root 0000000 0000000 [bumpversion]
current_version = 0.7.1
commit = True
tag = True
[build_ext]
inplace = 1
[bumpversion:file:gpyfft/version.py]
gpyfft-0.7.1/setup.py 0000664 0000000 0000000 00000005462 13177652365 0014556 0 ustar 00root root 0000000 0000000 import os
import platform
from setuptools import setup, Extension
from distutils.util import convert_path
from Cython.Build import cythonize
system = platform.system()
## paths settings
# Linux
if 'Linux' in system:
CLFFT_DIR = r'/home/gregor/devel/clFFT'
CLFFT_LIB_DIR = r'/usr/local/lib64'
CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'src', 'include'), ]
CL_INCL_DIRS = ['/opt/AMDAPPSDK-3.0/include']
#Windows
elif 'Windows' in system:
CLFFT_DIR = r'C:\Users\q014gt\Devel\clFFT-Full-2.10.2-Windows-x64'
CLFFT_LIB_DIR = os.path.join(CLFFT_DIR, 'bin')
CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'include'), ]
CL_DIR = os.getenv('AMDAPPSDKROOT')
CL_INCL_DIRS = [os.path.join(CL_DIR, 'include')]
# macOS
elif 'Darwin' in system:
CLFFT_DIR = r'/Users/gregor/Devel/clFFT'
CLFFT_LIB_DIR = r'/Users/gregor/Devel/clFFT/src/library'
CLFFT_INCL_DIRS = [os.path.join(CLFFT_DIR, 'src', 'include'), ]
CL_INCL_DIRS = []
import Cython.Compiler.Options
Cython.Compiler.Options.generate_cleanup_code = 2
extensions = [
Extension("gpyfft.gpyfftlib",
[os.path.join('gpyfft', 'gpyfftlib.pyx')],
include_dirs= CLFFT_INCL_DIRS + CL_INCL_DIRS,
extra_compile_args=[],
extra_link_args=[],
libraries=['clFFT'],
library_dirs = [CLFFT_LIB_DIR,],
language='c++',
)
]
def copy_clfftdll_to_package():
import shutil
shutil.copy(
os.path.join(CLFFT_LIB_DIR, 'clFFT.dll'),
'gpyfft')
shutil.copy(
os.path.join(CLFFT_LIB_DIR, 'StatTimer.dll'),
'gpyfft')
print("copied clFFT.dll, StatTimer.dll")
package_data = {}
if 'Windows' in platform.system():
copy_clfftdll_to_package()
package_data.update({'gpyfft': ['clFFT.dll', 'StatTimer.dll']},)
def get_version():
main_ns = {}
version_path = convert_path('gpyfft/version.py')
with open(version_path) as version_file:
exec(version_file.read(), main_ns)
version = main_ns['__version__']
return version
def get_readme():
dirname = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(dirname, "README.md"), "r") as fp:
long_description = fp.read()
return long_description
install_requires = ["numpy", "pyopencl"]
setup_requires = ["numpy", "cython"]
setup(
name='gpyfft',
version=get_version(),
description='A Python wrapper for the OpenCL FFT library clFFT',
long_description=get_readme(),
url=r"https://github.com/geggo/gpyfft",
maintainer='Gregor Thalhammer',
maintainer_email='gregor.thalhammer@gmail.com',
license='LGPL',
packages=['gpyfft', "gpyfft.test"],
ext_modules=cythonize(extensions),
package_data=package_data,
install_requires=install_requires,
setup_requires=setup_requires,
)