gimmik-2.1/ 0000755 0000765 0000024 00000000000 12755351540 013431 5 ustar vincent staff 0000000 0000000 gimmik-2.1/gimmik/ 0000755 0000765 0000024 00000000000 12755351540 014706 5 ustar vincent staff 0000000 0000000 gimmik-2.1/gimmik/__init__.py 0000644 0000765 0000024 00000001766 12755167376 017045 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
import pkgutil
import re
from mako.template import Template
import numpy as np
from gimmik._version import __version__
def generate_mm(mat, dtype, platform, alpha=1.0, beta=0.0, funcn='gimmik_mm'):
# Data type
dtype = np.dtype(dtype).type
if dtype == np.float32:
dtype = 'float'
elif dtype == np.float64:
dtype = 'double'
else:
raise ValueError('Invalid floating point data type')
# Multiply the matrix through by alpha
mat = alpha*mat
# Template arguments
tplargs = {'dtype': dtype, 'mat': mat, 'beta': beta, 'funcn': funcn}
# Load and render the template
tpl = pkgutil.get_data(__name__, 'kernels/{0}.mako'.format(platform))
src = Template(tpl).render(**tplargs)
# At single precision suffix all floating point constants by 'f'
if dtype == 'float':
src = re.sub(r'(?=\d*[.eE])(?=\.?\d)\d*\.?\d*(?:[eE][+-]?\d+)?',
r'\g<0>f', src)
# Return the source
return src
gimmik-2.1/gimmik/_version.py 0000644 0000765 0000024 00000000055 12755167376 017120 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
__version__ = '2.1'
gimmik-2.1/gimmik/kernels/ 0000755 0000765 0000024 00000000000 12755351540 016351 5 ustar vincent staff 0000000 0000000 gimmik-2.1/gimmik/kernels/c-omp.mako 0000644 0000765 0000024 00000001172 12755167376 020252 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
void
${funcn}(int ncol,
const ${dtype}* restrict b, int ldb,
${dtype}* restrict c, int ldc)
{
${dtype} dotp;
#pragma omp parallel for simd private(dotp)
for (int i = 0; i < ncol; i++)
{
% for j, jx in enumerate(mat):
dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx)
for k, kx in enumerate(jx) if kx != 0) or 0};
% if beta == 0:
c[i + ${j}*ldc] = dotp;
% elif beta == 1:
c[i + ${j}*ldc] += dotp;
% else:
c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc];
% endif
% endfor
}
}
gimmik-2.1/gimmik/kernels/c.mako 0000644 0000765 0000024 00000001131 12755167376 017454 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
void
${funcn}(int n,
const ${dtype}* restrict b, int ldb,
${dtype}* restrict c, int ldc)
{
${dtype} dotp;
#pragma omp simd
for (int i = 0; i < n; i++)
{
% for j, jx in enumerate(mat):
dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx)
for k, kx in enumerate(jx) if kx != 0) or 0};
% if beta == 0:
c[i + ${j}*ldc] = dotp;
% elif beta == 1:
c[i + ${j}*ldc] += dotp;
% else:
c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc];
% endif
% endfor
}
}
gimmik-2.1/gimmik/kernels/cuda.mako 0000644 0000765 0000024 00000001167 12755167376 020157 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
__global__ void
${funcn}(int n,
const ${dtype}* __restrict__ b, int ldb,
${dtype}* __restrict__ c, int ldc)
{
int i = blockDim.x*blockIdx.x + threadIdx.x;
${dtype} dotp;
if (i < n)
{
% for j, jx in enumerate(mat):
dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx)
for k, kx in enumerate(jx) if kx != 0) or 0};
% if beta == 0:
c[i + ${j}*ldc] = dotp;
% elif beta == 1:
c[i + ${j}*ldc] += dotp;
% else:
c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc];
% endif
% endfor
}
}
gimmik-2.1/gimmik/kernels/ispc.mako 0000644 0000765 0000024 00000001135 12755167376 020174 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
export void
${funcn}(uniform int n,
const uniform ${dtype} b[], uniform int ldb,
${dtype} uniform c[], uniform int ldc)
{
${dtype} dotp;
foreach (i = 0 ... n)
{
% for j, jx in enumerate(mat):
dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx)
for k, kx in enumerate(jx) if kx != 0) or 0};
% if beta == 0:
c[i + ${j}*ldc] = dotp;
% elif beta == 1:
c[i + ${j}*ldc] += dotp;
% else:
c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc];
% endif
% endfor
}
}
gimmik-2.1/gimmik/kernels/opencl.mako 0000644 0000765 0000024 00000001337 12755167376 020522 0 ustar vincent staff 0000000 0000000 # -*- coding: utf-8 -*-
% if dtype == 'double':
#if __OPENCL_VERSION__ < 120
# pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
% endif
__kernel void
${funcn}(int n,
__global const ${dtype}* restrict b, int ldb,
__global ${dtype}* restrict c, int ldc)
{
int i = get_global_id(0);
${dtype} dotp;
if (i < n)
{
% for j, jx in enumerate(mat):
dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx)
for k, kx in enumerate(jx) if kx != 0) or 0};
% if beta == 0:
c[i + ${j}*ldc] = dotp;
% elif beta == 1:
c[i + ${j}*ldc] += dotp;
% else:
c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc];
% endif
% endfor
}
}
gimmik-2.1/gimmik.egg-info/ 0000755 0000765 0000024 00000000000 12755351540 016400 5 ustar vincent staff 0000000 0000000 gimmik-2.1/gimmik.egg-info/dependency_links.txt 0000644 0000765 0000024 00000000001 12755351540 022446 0 ustar vincent staff 0000000 0000000
gimmik-2.1/gimmik.egg-info/PKG-INFO 0000644 0000765 0000024 00000001717 12755351540 017503 0 ustar vincent staff 0000000 0000000 Metadata-Version: 1.1
Name: gimmik
Version: 2.1
Summary: Generator of Matrix Multiplication Kernels
Home-page: https://github.com/vincentlab/GiMMiK
Author: Freddie Witherden
Author-email: freddie@witherden.org
License: BSD
Description: GiMMiK is a Python based kernel generator for
matrix multiplication kernels for various accelerator platforms. For
small operator matrices the generated kernels are capable of
outperfoming the state-of-the-art general matrix multiplication
routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally
developed as part of Bartosz Wozniak's master's thesis in the
Department of Computing at Imperial College London and is currently
maintained by Freddie Witherden.
Keywords: Matrix Multiplication,GPU,CUDA,OpenCL
Platform: UNKNOWN
Classifier: License :: OSI Approved :: BSD License
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Scientific/Engineering
gimmik-2.1/gimmik.egg-info/requires.txt 0000644 0000765 0000024 00000000022 12755351540 020772 0 ustar vincent staff 0000000 0000000 mako
numpy >= 1.7
gimmik-2.1/gimmik.egg-info/SOURCES.txt 0000644 0000765 0000024 00000000513 12755351540 020263 0 ustar vincent staff 0000000 0000000 README.rst
setup.py
gimmik/__init__.py
gimmik/_version.py
gimmik.egg-info/PKG-INFO
gimmik.egg-info/SOURCES.txt
gimmik.egg-info/dependency_links.txt
gimmik.egg-info/requires.txt
gimmik.egg-info/top_level.txt
gimmik/kernels/c-omp.mako
gimmik/kernels/c.mako
gimmik/kernels/cuda.mako
gimmik/kernels/ispc.mako
gimmik/kernels/opencl.mako gimmik-2.1/gimmik.egg-info/top_level.txt 0000644 0000765 0000024 00000000007 12755351540 021127 0 ustar vincent staff 0000000 0000000 gimmik
gimmik-2.1/PKG-INFO 0000644 0000765 0000024 00000001717 12755351540 014534 0 ustar vincent staff 0000000 0000000 Metadata-Version: 1.1
Name: gimmik
Version: 2.1
Summary: Generator of Matrix Multiplication Kernels
Home-page: https://github.com/vincentlab/GiMMiK
Author: Freddie Witherden
Author-email: freddie@witherden.org
License: BSD
Description: GiMMiK is a Python based kernel generator for
matrix multiplication kernels for various accelerator platforms. For
small operator matrices the generated kernels are capable of
outperfoming the state-of-the-art general matrix multiplication
routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally
developed as part of Bartosz Wozniak's master's thesis in the
Department of Computing at Imperial College London and is currently
maintained by Freddie Witherden.
Keywords: Matrix Multiplication,GPU,CUDA,OpenCL
Platform: UNKNOWN
Classifier: License :: OSI Approved :: BSD License
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Scientific/Engineering
gimmik-2.1/README.rst 0000644 0000765 0000024 00000003162 12755167376 015136 0 ustar vincent staff 0000000 0000000 GiMMiK
======
Generator of Matrix Multiplication Kernels - GiMMiK - is a tool for generation of high performance matrix multiplication kernel code for various accelerator platforms. Currently CUDA and OpenCL are the only supported platforms.
What does GiMMiK do?
--------------------
Consider matrix multiplication of the form
C = α ∙ A ⨉ B + β ∙ C
GiMMiK generates fully unrolled kernels, highly specialised to a given operator matrix. The generated code is fully unrolled - each kernel computes a single column of the output matrix. GiMMiK was designed to perform well in a Block by Panel type of matrix multiplication where the operator matrix is small. GiMMiK also removes any sparsity form the operator matrix as well as attempts to reduce common sub-expressions.
How do I install GiMMiK?
------------------------
Clone the git repository and use `setup.py` to install the GiMMiK package. You will need the following dependencies:
* `mako `_
* `numpy >= 1.7 `_
Once obtained, you can install GiMMiK by running
::
python setup.py install
to perform a system-wide install. Alternatively, run
::
python setup.py install --user
to install the package locally.
How do I use GiMMiK?
--------------------
Once installed, you are ready to use GiMMiK.
.. code:: python
from gimmik import generate_mm
...
# Generate a CUDA kernel for C = 2*mat*B
src = generate_mm(mat, np.float32, platform='cuda', alpha=2.0, beta=0.0)
...
Who uses GiMMiK?
----------------
GiMMiK was develop to improve performance of the `PyFR `_ framework.
gimmik-2.1/setup.cfg 0000644 0000765 0000024 00000000073 12755351540 015252 0 ustar vincent staff 0000000 0000000 [egg_info]
tag_build =
tag_svn_revision = 0
tag_date = 0
gimmik-2.1/setup.py 0000755 0000765 0000024 00000003421 12755167376 015162 0 ustar vincent staff 0000000 0000000 #!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
from setuptools import setup
import sys
# Python version
if sys.version_info[:2] < (3, 3):
print('GiMMiK requires Python 3.3 or newer')
sys.exit(-1)
# GiMMiK version
vfile = open('gimmik/_version.py').read()
vsrch = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", vfile, re.M)
if vsrch:
version = vsrch.group(1)
else:
print('Unable to find a version string in gimmik/_version.py')
# Data
package_data = {
'gimmik': ['kernels/*.mako'],
}
# Hard dependencies
install_requires = [
'mako',
'numpy >= 1.7'
]
# Info
classifiers = [
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 3.3',
'Topic :: Scientific/Engineering'
]
# Long Description
long_description = '''GiMMiK is a Python based kernel generator for
matrix multiplication kernels for various accelerator platforms. For
small operator matrices the generated kernels are capable of
outperfoming the state-of-the-art general matrix multiplication
routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally
developed as part of Bartosz Wozniak's master's thesis in the
Department of Computing at Imperial College London and is currently
maintained by Freddie Witherden.'''
setup(name='gimmik',
version=version,
# Packages
packages=['gimmik'],
package_data=package_data,
install_requires=install_requires,
# Metadata
description='Generator of Matrix Multiplication Kernels',
long_description=long_description,
maintainer='Freddie Witherden',
maintainer_email='freddie@witherden.org',
url='https://github.com/vincentlab/GiMMiK',
license='BSD',
keywords=['Matrix Multiplication', 'GPU', 'CUDA', 'OpenCL'],
classifiers=classifiers)