pax_global_header 0000666 0000000 0000000 00000000064 12614655073 0014523 g ustar 00root root 0000000 0000000 52 comment=8be1465e784101770a0afb5172f04ff283c69927
scikit-cuda-0.5.1/ 0000775 0000000 0000000 00000000000 12614655073 0013726 5 ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/AUTHORS 0000777 0000000 0000000 00000000000 12614655073 0021440 2docs/source/authors.rst ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/CHANGES 0000777 0000000 0000000 00000000000 12614655073 0021306 2docs/source/changes.rst ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/INSTALL 0000777 0000000 0000000 00000000000 12614655073 0021402 2docs/source/install.rst ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/LICENSE 0000777 0000000 0000000 00000000000 12614655073 0021332 2docs/source/license.rst ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/MANIFEST.in 0000664 0000000 0000000 00000000526 12614655073 0015467 0 ustar 00root root 0000000 0000000 include AUTHORS CHANGES INSTALL LICENSE README.rst tox.ini
recursive-include skcuda *.py
recursive-include skcuda *.h
recursive-include demos *.py
recursive-include tests *.py
include docs/source/*.rst
include docs/Makefile
include docs/source/conf.py
include docs/source/_static/*
include docs/source/_templates/*.html
exclude MANIFEST.in
scikit-cuda-0.5.1/Makefile 0000664 0000000 0000000 00000000644 12614655073 0015372 0 ustar 00root root 0000000 0000000 PYTHON := `which python`
DESTDIR = /
NAME = scikit-cuda
VERSION = $(shell $(PYTHON) -c 'import setup; print setup.VERSION')
.PHONY: package build docs install test clean
package:
$(PYTHON) setup.py sdist --formats=gztar
build:
$(PYTHON) setup.py build
docs:
$(PYTHON) setup.py build_sphinx
install:
$(PYTHON) setup.py install --root=$(DESTDIR)
test:
$(PYTHON) setup.py test
clean:
$(PYTHON) setup.py clean
scikit-cuda-0.5.1/README.rst 0000664 0000000 0000000 00000003445 12614655073 0015423 0 ustar 00root root 0000000 0000000 .. -*- rst -*-
.. image:: https://raw.githubusercontent.com/lebedov/scikit-cuda/master/docs/source/_static/logo.png
:alt: scikit-cuda
Package Description
-------------------
scikit-cuda provides Python interfaces to many of the functions in the CUDA
device/runtime, CUBLAS, CUFFT, and CUSOLVER libraries distributed as part of
NVIDIA's `CUDA Programming Toolkit `_, as well as
interfaces to select functions in the free and standard versions of the `CULA
Dense Toolkit `_. Both low-level wrapper
functions similar to their C counterparts and high-level functions comparable to
those in `NumPy and Scipy `_ are provided.
.. image:: https://zenodo.org/badge/doi/10.5281/zenodo.20211.svg
:target: http://dx.doi.org/10.5281/zenodo.20211
:alt: 0.5.0
.. image:: https://img.shields.io/pypi/v/scikit-cuda.svg
:target: https://pypi.python.org/pypi/scikit-cuda
:alt: Latest Version
.. image:: https://img.shields.io/pypi/dm/scikit-cuda.svg
:target: https://pypi.python.org/pypi/scikit-cuda
:alt: Downloads
.. image:: http://prime4commit.com/projects/102.svg
:target: http://prime4commit.com/projects/102
:alt: Support the project
Documentation
-------------
Package documentation is available at
``_.
Development
-----------
The latest source code can be obtained from
``_.
Authors & Acknowledgments
-------------------------
See the included `AUTHORS`_ file for more information.
.. _AUTHORS: docs/source/authors.rst
License
-------
This software is licensed under the
`BSD License `_.
See the included `LICENSE`_ file for more information.
.. _LICENSE: docs/source/license.rst
scikit-cuda-0.5.1/demos/ 0000775 0000000 0000000 00000000000 12614655073 0015035 5 ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/demos/diag_demo.py 0000664 0000000 0000000 00000001425 12614655073 0017321 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrate diagonal matrix creation on the GPU.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing real diagonal matrix creation for type ' + str(np.dtype(t))
v = np.array([1, 2, 3, 4, 5, 6], t)
v_gpu = gpuarray.to_gpu(v)
d_gpu = culinalg.diag(v_gpu);
print 'Success status: ', np.all(d_gpu.get() == np.diag(v))
scikit-cuda-0.5.1/demos/dot_demo.py 0000664 0000000 0000000 00000003440 12614655073 0017202 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates multiplication of two matrices on the GPU.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing matrix multiplication for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
a = np.asarray(np.random.rand(10, 5)+1j*np.random.rand(10, 5), t)
b = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)
c = np.asarray(np.random.rand(5, 5)+1j*np.random.rand(5, 5), t)
else:
a = np.asarray(np.random.rand(10, 5), t)
b = np.asarray(np.random.rand(5, 5), t)
c = np.asarray(np.random.rand(5, 5), t)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
temp_gpu = culinalg.dot(a_gpu, b_gpu)
d_gpu = culinalg.dot(temp_gpu, c_gpu)
temp_gpu.gpudata.free()
del(temp_gpu)
print 'Success status: ', np.allclose(np.dot(np.dot(a, b), c) , d_gpu.get())
print 'Testing vector multiplication for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
d = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)
e = np.asarray(np.random.rand(5)+1j*np.random.rand(5), t)
else:
d = np.asarray(np.random.rand(5), t)
e = np.asarray(np.random.rand(5), t)
d_gpu = gpuarray.to_gpu(d)
e_gpu = gpuarray.to_gpu(e)
temp = culinalg.dot(d_gpu, e_gpu)
print 'Success status: ', np.allclose(np.dot(d, e), temp)
scikit-cuda-0.5.1/demos/fft2d_batch_demo.py 0000664 0000000 0000000 00000002466 12614655073 0020571 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to use the PyCUDA interface to CUFFT to compute a
batch of 2D FFTs.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 256
batch_size = 16
x = np.empty((batch_size, N, N), np.float32)
xf = np.empty((batch_size, N, N), np.complex64)
y = np.empty((batch_size, N, N), np.float32)
for i in xrange(batch_size):
x[i, :, :] = np.asarray(np.random.rand(N, N), np.float32)
xf[i, :, :] = np.fft.fft2(x[i, :, :])
y[i, :, :] = np.real(np.fft.ifft2(xf[i, :, :]))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((batch_size, N, N/2+1), np.complex64)
plan_forward = cu_fft.Plan((N, N), np.float32, np.complex64, batch_size)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan((N, N), np.complex64, np.float32, batch_size)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.empty((batch_size, N, N), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan((N, N), np.complex64, np.complex64, batch_size)
cu_fft.fft(x_gpu, x_gpu, plan)
cu_fft.ifft(x_gpu, x_gpu, plan, True)
print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)
scikit-cuda-0.5.1/demos/fft2d_demo.py 0000664 0000000 0000000 00000002121 12614655073 0017414 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to use the PyCUDA interface to CUFFT to compute 2D FFTs.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 1024
M = N/2
x = np.asarray(np.random.rand(N, M), np.float32)
xf = np.fft.fft2(x)
y = np.real(np.fft.ifft2(xf))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((x.shape[0], x.shape[1]/2+1), np.complex64)
plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.asarray(np.random.rand(N, M)+1j*np.random.rand(N, M), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64)
cu_fft.fft(x_gpu, x_gpu, plan)
cu_fft.ifft(x_gpu, x_gpu, plan, True)
print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)
scikit-cuda-0.5.1/demos/fft_batch_demo.py 0000664 0000000 0000000 00000002216 12614655073 0020334 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to use the PyCUDA interface to CUFFT to compute a
batch of 1D FFTs.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 4096*16
batch_size = 16
x = np.asarray(np.random.rand(batch_size, N), np.float32)
xf = np.fft.fft(x)
y = np.real(np.fft.ifft(xf))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((batch_size, N/2+1), np.complex64)
plan_forward = cu_fft.Plan(N, np.float32, np.complex64, batch_size)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(N, np.complex64, np.float32, batch_size)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.asarray(np.random.rand(batch_size, N)+\
1j*np.random.rand(batch_size, N), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan(N, np.complex64, np.complex64, batch_size)
cu_fft.fft(x_gpu, x_gpu, plan)
cu_fft.ifft(x_gpu, x_gpu, plan, True)
print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)
scikit-cuda-0.5.1/demos/fft_demo.py 0000664 0000000 0000000 00000002052 12614655073 0017171 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to use the PyCUDA interface to CUFFT to compute 1D FFTs.
"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.fft as cu_fft
print 'Testing fft/ifft..'
N = 4096*16
x = np.asarray(np.random.rand(N), np.float32)
xf = np.fft.fft(x)
y = np.real(np.fft.ifft(xf))
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty(N/2+1, np.complex64)
plan_forward = cu_fft.Plan(x_gpu.shape, np.float32, np.complex64)
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan(x_gpu.shape, np.complex64, np.float32)
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
print 'Success status: ', np.allclose(y, y_gpu.get(), atol=1e-6)
print 'Testing in-place fft..'
x = np.asarray(np.random.rand(N)+1j*np.random.rand(N), np.complex64)
x_gpu = gpuarray.to_gpu(x)
plan = cu_fft.Plan(x_gpu.shape, np.complex64, np.complex64)
cu_fft.fft(x_gpu, x_gpu, plan)
cu_fft.ifft(x_gpu, x_gpu, plan, True)
print 'Success status: ', np.allclose(x, x_gpu.get(), atol=1e-6)
scikit-cuda-0.5.1/demos/indexing_2d_demo.py 0000664 0000000 0000000 00000004351 12614655073 0020610 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to access 2D arrays within a PyCUDA kernel in a
numpy-consistent manner.
"""
from string import Template
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
import numpy as np
import skcuda.misc as misc
A = 3
B = 4
N = A*B
# Define a 2D array:
# x_orig = np.arange(0, N, 1, np.float64)
x_orig = np.asarray(np.random.rand(N), np.float64)
x = x_orig.reshape((A, B))
# These functions demonstrate how to convert a linear index into subscripts:
a = lambda i: i/B
b = lambda i: np.mod(i, B)
# Check that x[subscript(i)] is equivalent to x.flat[i]:
subscript = lambda i: (a(i), b(i))
for i in xrange(x.size):
assert x.flat[i] == x[subscript(i)]
# Check that x[i, j] is equivalent to x.flat[index(i, j)]:
index = lambda i, j: i*B+j
for i in xrange(A):
for j in xrange(B):
assert x[i, j] == x.flat[index(i, j)]
func_mod_template = Template("""
// Macro for converting subscripts to linear index:
#define INDEX(a, b) a*${B}+b
__global__ void func(double *x, unsigned int N) {
// Obtain the linear index corresponding to the current thread:
unsigned int idx = blockIdx.y*${max_threads_per_block}*${max_blocks_per_grid}+
blockIdx.x*${max_threads_per_block}+threadIdx.x;
// Convert the linear index to subscripts:
unsigned int a = idx/${B};
unsigned int b = idx%${B};
// Use the subscripts to access the array:
if (idx < N) {
if (b == 0)
x[INDEX(a,b)] = 100;
}
}
""")
max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, x.shape)
max_blocks_per_grid = max(max_grid_dim)
func_mod = \
SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
max_blocks_per_grid=max_blocks_per_grid,
A=A, B=B))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size),
block=block_dim,
grid=grid_dim)
x_np = x.copy()
x_np[:, 0] = 100
print 'Success status: ', np.allclose(x_np, x_gpu.get())
scikit-cuda-0.5.1/demos/indexing_3d_demo.py 0000664 0000000 0000000 00000004646 12614655073 0020620 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to access 3D arrays within a PyCUDA kernel in a
numpy-consistent manner.
"""
from string import Template
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
import numpy as np
import skcuda.misc as misc
A = 3
B = 4
C = 5
N = A*B*C
# Define a 3D array:
# x_orig = np.arange(0, N, 1, np.float64)
x_orig = np.asarray(np.random.rand(N), np.float64)
x = x_orig.reshape((A, B, C))
# These functions demonstrate how to convert a linear index into subscripts:
a = lambda i: i/(B*C)
b = lambda i: np.mod(i, B*C)/C
c = lambda i: np.mod(np.mod(i, B*C), C)
# Check that x[ind(i)] is equivalent to x.flat[i]:
subscript = lambda i: (a(i), b(i), c(i))
for i in xrange(x.size):
assert x.flat[i] == x[subscript(i)]
# Check that x[i,j,k] is equivalent to x.flat[index(i,j,k)]:
index = lambda i,j,k: i*B*C+j*C+k
for i in xrange(A):
for j in xrange(B):
for k in xrange(C):
assert x[i, j, k] == x.flat[index(i, j, k)]
func_mod_template = Template("""
// Macro for converting subscripts to linear index:
#define INDEX(a, b, c) a*${B}*${C}+b*${C}+c
__global__ void func(double *x, unsigned int N) {
// Obtain the linear index corresponding to the current thread:
unsigned int idx = blockIdx.y*${max_threads_per_block}*${max_blocks_per_grid}+
blockIdx.x*${max_threads_per_block}+threadIdx.x;
// Convert the linear index to subscripts:
unsigned int a = idx/(${B}*${C});
unsigned int b = (idx%(${B}*${C}))/${C};
unsigned int c = (idx%(${B}*${C}))%${C};
// Use the subscripts to access the array:
if (idx < N) {
if (b == 0)
x[INDEX(a,b,c)] = 100;
}
}
""")
max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, x.shape)
max_blocks_per_grid = max(max_grid_dim)
func_mod = \
SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
max_blocks_per_grid=max_blocks_per_grid,
A=A, B=B, C=C))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size),
block=block_dim,
grid=grid_dim)
x_np = x.copy()
x_np[:, 0, :] = 100
print 'Success status: ', np.allclose(x_np, x_gpu.get())
scikit-cuda-0.5.1/demos/indexing_4d_demo.py 0000664 0000000 0000000 00000005314 12614655073 0020612 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to access 4D arrays within a PyCUDA kernel in a
numpy-consistent manner.
"""
from string import Template
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
import numpy as np
import skcuda.misc as misc
A = 3
B = 4
C = 5
D = 6
N = A*B*C*D
# Define a 3D array:
# x_orig = np.arange(0, N, 1, np.float64)
x_orig = np.asarray(np.random.rand(N), np.float64)
x = x_orig.reshape((A, B, C, D))
# These functions demonstrate how to convert a linear index into subscripts:
a = lambda i: i/(B*C*D)
b = lambda i: np.mod(i, B*C*D)/(C*D)
c = lambda i: np.mod(np.mod(i, B*C*D), C*D)/D
d = lambda i: np.mod(np.mod(np.mod(i, B*C*D), C*D), D)
# Check that x[subscript(i)] is equivalent to x.flat[i]:
subscript = lambda i: (a(i), b(i), c(i), d(i))
for i in xrange(x.size):
assert x.flat[i] == x[subscript(i)]
# Check that x[i,j,k,l] is equivalent to x.flat[index(i,j,k,l)]:
index = lambda i,j,k,l: i*B*C*D+j*C*D+k*D+l
for i in xrange(A):
for j in xrange(B):
for k in xrange(C):
for l in xrange(D):
assert x[i, j, k, l] == x.flat[index(i, j, k, l)]
func_mod_template = Template("""
// Macro for converting subscripts to linear index:
#define INDEX(a, b, c, d) a*${B}*${C}*${D}+b*${C}*${D}+c*${D}+d
__global__ void func(double *x, unsigned int N) {
// Obtain the linear index corresponding to the current thread:
unsigned int idx = blockIdx.y*${max_threads_per_block}*${max_blocks_per_grid}+
blockIdx.x*${max_threads_per_block}+threadIdx.x;
// Convert the linear index to subscripts:
unsigned int a = idx/(${B}*${C}*${D});
unsigned int b = (idx%(${B}*${C}*${D}))/(${C}*${D});
unsigned int c = ((idx%(${B}*${C}*${D}))%(${C}*${D}))/${D};
unsigned int d = ((idx%(${B}*${C}*${D}))%(${C}*${D}))%${D};
// Use the subscripts to access the array:
if (idx < N) {
if (c == 0)
x[INDEX(a,b,c,d)] = 100;
}
}
""")
max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, x.shape)
max_blocks_per_grid = max(max_grid_dim)
func_mod = \
SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
max_blocks_per_grid=max_blocks_per_grid,
A=A, B=B, C=C, D=D))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size),
block=block_dim,
grid=grid_dim)
x_np = x.copy()
x_np[:, :, 0, :] = 100
print 'Success status: ', np.allclose(x_np, x_gpu.get())
scikit-cuda-0.5.1/demos/mdot_demo.py 0000664 0000000 0000000 00000002335 12614655073 0017361 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates multiplication of several matrices on the GPU.
"""
import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import pycuda.autoinit
import numpy as np
import skcuda.linalg as linalg
import skcuda.misc as cumisc
linalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing multiple matrix multiplication for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
a = np.asarray(np.random.rand(8, 4)+1j*np.random.rand(8, 4), t)
b = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), t)
c = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), t)
else:
a = np.asarray(np.random.rand(8, 4), t)
b = np.asarray(np.random.rand(4, 4), t)
c = np.asarray(np.random.rand(4, 4), t)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = gpuarray.to_gpu(c)
d_gpu = linalg.mdot(a_gpu, b_gpu, c_gpu)
print 'Success status: ', np.allclose(np.dot(a, np.dot(b, c)), d_gpu.get())
scikit-cuda-0.5.1/demos/pinv_demo.py 0000664 0000000 0000000 00000001766 12614655073 0017401 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates computation of the pseudoinverse on the GPU.
"""
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
import scikits.cuda.cula as cula
demo_types = [np.float32, np.complex64]
if cula._libcula_toolkit == 'premium' and \
cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing pinv for type ' + str(np.dtype(t))
a = np.asarray((np.random.rand(50, 50)-0.5)/10, t)
a_gpu = gpuarray.to_gpu(a)
a_inv_gpu = culinalg.pinv(a_gpu)
print 'Success status: ', np.allclose(np.linalg.pinv(a), a_inv_gpu.get(),
atol=1e-2)
print 'Maximum error: ', np.max(np.abs(np.linalg.pinv(a)-a_inv_gpu.get()))
print ''
scikit-cuda-0.5.1/demos/svd_demo.py 0000664 0000000 0000000 00000002110 12614655073 0017201 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates computation of the singular value decomposition on the GPU.
"""
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
import scikits.cuda.cula as cula
demo_types = [np.float32, np.complex64]
if cula._libcula_toolkit == 'premium' and \
cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing svd for type ' + str(np.dtype(t))
a = np.asarray((np.random.rand(50, 50)-0.5)/10, t)
a_gpu = gpuarray.to_gpu(a)
u_gpu, s_gpu, vh_gpu = culinalg.svd(a_gpu)
a_rec = np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get()))
print 'Success status: ', np.allclose(a, a_rec, atol=1e-3)
print 'Maximum error: ', np.max(np.abs(a-a_rec))
print ''
scikit-cuda-0.5.1/demos/transpose_demo.py 0000664 0000000 0000000 00000002067 12614655073 0020436 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to transpose matrices on the GPU.
"""
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing transpose for type ' + str(np.dtype(t))
if np.iscomplexobj(t()):
b = np.array([[1j, 2j, 3j, 4j, 5j, 6j],
[7j, 8j, 9j, 10j, 11j, 12j]], t)
else:
a = np.array([[1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12]], t)
a_gpu = gpuarray.to_gpu(a)
at_gpu = culinalg.transpose(a_gpu)
if np.iscomplexobj(t()):
print 'Success status: ', np.all(np.conj(a.T) == at_gpu.get())
else:
print 'Success status: ', np.all(a.T == at_gpu.get())
scikit-cuda-0.5.1/demos/tril_demo.py 0000664 0000000 0000000 00000001646 12614655073 0017374 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
"""
Demonstrates how to extract the lower triangle of a matrix.
"""
import pycuda.autoinit
import pycuda.driver as drv
import numpy as np
import pycuda.gpuarray as gpuarray
import skcuda.linalg as culinalg
import skcuda.misc as cumisc
culinalg.init()
# Double precision is only supported by devices with compute
# capability >= 1.3:
import string
demo_types = [np.float32, np.complex64]
if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3:
demo_types.extend([np.float64, np.complex128])
for t in demo_types:
print 'Testing lower triangle extraction for type ' + str(np.dtype(t))
N = 10
if np.iscomplexobj(t()):
a = np.asarray(np.random.rand(N, N), t)
else:
a = np.asarray(np.random.rand(N, N)+1j*np.random.rand(N, N), t)
a_gpu = gpuarray.to_gpu(a)
b_gpu = culinalg.tril(a_gpu, False)
print 'Success status: ', np.allclose(b_gpu.get(), np.tril(a))
scikit-cuda-0.5.1/docs/ 0000775 0000000 0000000 00000000000 12614655073 0014656 5 ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/docs/Makefile 0000664 0000000 0000000 00000006137 12614655073 0016325 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = build
SRCDIR = source
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/ $(SRCDIR)/generated/
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TED.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TED.qhc"
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
"run these through (pdf)latex."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
scikit-cuda-0.5.1/docs/source/ 0000775 0000000 0000000 00000000000 12614655073 0016156 5 ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/docs/source/_static/ 0000775 0000000 0000000 00000000000 12614655073 0017604 5 ustar 00root root 0000000 0000000 scikit-cuda-0.5.1/docs/source/_static/logo.png 0000664 0000000 0000000 00000012654 12614655073 0021262 0 ustar 00root root 0000000 0000000 PNG
IHDR O L8 sBIT|d pHYs {/ tEXtSoftware www.inkscape.org< )IDATxytUս>I$
*I֥Uq(ZE$hUSZ컶CW(2էXZx>(CP ;}{>ke-ܽɹwOrr.8&Git)zuR짡;4zvuڀrk6X4kt{?6_BE%SwP:\E
W5#];nZNm!) QuMGSf=M5(S!/li~%UA3PB@NNZR0x
|avb3!ɂ@CVe VA.hB!Z
ӵpЯ˵J)]ުܿ;*p]sZ1}ʋ]BOCfhK5cAeKD3!64so!#B-#ZTy0ީu?"G!h"VioҾO+7;h[tB!DhSusds;=hC#C!h!Nލnz,B6_!BT\*[ZPAUJVB!2"(E,!BnIG+BH:Z!"D*ͻZ;NۅړsA78;(ǘE@Ϡ *.'daNX`_cxN
?
{nC<9s-mDc7xw/08<bYo8S @[6{