gimmik-2.1/0000755000076500000240000000000012755351540013431 5ustar vincentstaff00000000000000gimmik-2.1/gimmik/0000755000076500000240000000000012755351540014706 5ustar vincentstaff00000000000000gimmik-2.1/gimmik/__init__.py0000644000076500000240000000176612755167376017045 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- import pkgutil import re from mako.template import Template import numpy as np from gimmik._version import __version__ def generate_mm(mat, dtype, platform, alpha=1.0, beta=0.0, funcn='gimmik_mm'): # Data type dtype = np.dtype(dtype).type if dtype == np.float32: dtype = 'float' elif dtype == np.float64: dtype = 'double' else: raise ValueError('Invalid floating point data type') # Multiply the matrix through by alpha mat = alpha*mat # Template arguments tplargs = {'dtype': dtype, 'mat': mat, 'beta': beta, 'funcn': funcn} # Load and render the template tpl = pkgutil.get_data(__name__, 'kernels/{0}.mako'.format(platform)) src = Template(tpl).render(**tplargs) # At single precision suffix all floating point constants by 'f' if dtype == 'float': src = re.sub(r'(?=\d*[.eE])(?=\.?\d)\d*\.?\d*(?:[eE][+-]?\d+)?', r'\g<0>f', src) # Return the source return src gimmik-2.1/gimmik/_version.py0000644000076500000240000000005512755167376017120 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- __version__ = '2.1' gimmik-2.1/gimmik/kernels/0000755000076500000240000000000012755351540016351 5ustar vincentstaff00000000000000gimmik-2.1/gimmik/kernels/c-omp.mako0000644000076500000240000000117212755167376020252 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- void ${funcn}(int ncol, const ${dtype}* restrict b, int ldb, ${dtype}* restrict c, int ldc) { ${dtype} dotp; #pragma omp parallel for simd private(dotp) for (int i = 0; i < ncol; i++) { % for j, jx in enumerate(mat): dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx) for k, kx in enumerate(jx) if kx != 0) or 0}; % if beta == 0: c[i + ${j}*ldc] = dotp; % elif beta == 1: c[i + ${j}*ldc] += dotp; % else: c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc]; % endif % endfor } } gimmik-2.1/gimmik/kernels/c.mako0000644000076500000240000000113112755167376017454 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- void ${funcn}(int n, const ${dtype}* restrict b, int ldb, ${dtype}* restrict c, int ldc) { ${dtype} dotp; #pragma omp simd for (int i = 0; i < n; i++) { % for j, jx in enumerate(mat): dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx) for k, kx in enumerate(jx) if kx != 0) or 0}; % if beta == 0: c[i + ${j}*ldc] = dotp; % elif beta == 1: c[i + ${j}*ldc] += dotp; % else: c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc]; % endif % endfor } } gimmik-2.1/gimmik/kernels/cuda.mako0000644000076500000240000000116712755167376020157 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- __global__ void ${funcn}(int n, const ${dtype}* __restrict__ b, int ldb, ${dtype}* __restrict__ c, int ldc) { int i = blockDim.x*blockIdx.x + threadIdx.x; ${dtype} dotp; if (i < n) { % for j, jx in enumerate(mat): dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx) for k, kx in enumerate(jx) if kx != 0) or 0}; % if beta == 0: c[i + ${j}*ldc] = dotp; % elif beta == 1: c[i + ${j}*ldc] += dotp; % else: c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc]; % endif % endfor } } gimmik-2.1/gimmik/kernels/ispc.mako0000644000076500000240000000113512755167376020174 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- export void ${funcn}(uniform int n, const uniform ${dtype} b[], uniform int ldb, ${dtype} uniform c[], uniform int ldc) { ${dtype} dotp; foreach (i = 0 ... n) { % for j, jx in enumerate(mat): dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx) for k, kx in enumerate(jx) if kx != 0) or 0}; % if beta == 0: c[i + ${j}*ldc] = dotp; % elif beta == 1: c[i + ${j}*ldc] += dotp; % else: c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc]; % endif % endfor } } gimmik-2.1/gimmik/kernels/opencl.mako0000644000076500000240000000133712755167376020522 0ustar vincentstaff00000000000000# -*- coding: utf-8 -*- % if dtype == 'double': #if __OPENCL_VERSION__ < 120 # pragma OPENCL EXTENSION cl_khr_fp64: enable #endif % endif __kernel void ${funcn}(int n, __global const ${dtype}* restrict b, int ldb, __global ${dtype}* restrict c, int ldc) { int i = get_global_id(0); ${dtype} dotp; if (i < n) { % for j, jx in enumerate(mat): dotp = ${' + '.join('{kx}*b[i + {k}*ldb]'.format(k=k, kx=kx) for k, kx in enumerate(jx) if kx != 0) or 0}; % if beta == 0: c[i + ${j}*ldc] = dotp; % elif beta == 1: c[i + ${j}*ldc] += dotp; % else: c[i + ${j}*ldc] = dotp + ${beta}*c[i + ${j}*ldc]; % endif % endfor } } gimmik-2.1/gimmik.egg-info/0000755000076500000240000000000012755351540016400 5ustar vincentstaff00000000000000gimmik-2.1/gimmik.egg-info/dependency_links.txt0000644000076500000240000000000112755351540022446 0ustar vincentstaff00000000000000 gimmik-2.1/gimmik.egg-info/PKG-INFO0000644000076500000240000000171712755351540017503 0ustar vincentstaff00000000000000Metadata-Version: 1.1 Name: gimmik Version: 2.1 Summary: Generator of Matrix Multiplication Kernels Home-page: https://github.com/vincentlab/GiMMiK Author: Freddie Witherden Author-email: freddie@witherden.org License: BSD Description: GiMMiK is a Python based kernel generator for matrix multiplication kernels for various accelerator platforms. For small operator matrices the generated kernels are capable of outperfoming the state-of-the-art general matrix multiplication routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally developed as part of Bartosz Wozniak's master's thesis in the Department of Computing at Imperial College London and is currently maintained by Freddie Witherden. Keywords: Matrix Multiplication,GPU,CUDA,OpenCL Platform: UNKNOWN Classifier: License :: OSI Approved :: BSD License Classifier: Programming Language :: Python :: 3.3 Classifier: Topic :: Scientific/Engineering gimmik-2.1/gimmik.egg-info/requires.txt0000644000076500000240000000002212755351540020772 0ustar vincentstaff00000000000000mako numpy >= 1.7 gimmik-2.1/gimmik.egg-info/SOURCES.txt0000644000076500000240000000051312755351540020263 0ustar vincentstaff00000000000000README.rst setup.py gimmik/__init__.py gimmik/_version.py gimmik.egg-info/PKG-INFO gimmik.egg-info/SOURCES.txt gimmik.egg-info/dependency_links.txt gimmik.egg-info/requires.txt gimmik.egg-info/top_level.txt gimmik/kernels/c-omp.mako gimmik/kernels/c.mako gimmik/kernels/cuda.mako gimmik/kernels/ispc.mako gimmik/kernels/opencl.makogimmik-2.1/gimmik.egg-info/top_level.txt0000644000076500000240000000000712755351540021127 0ustar vincentstaff00000000000000gimmik gimmik-2.1/PKG-INFO0000644000076500000240000000171712755351540014534 0ustar vincentstaff00000000000000Metadata-Version: 1.1 Name: gimmik Version: 2.1 Summary: Generator of Matrix Multiplication Kernels Home-page: https://github.com/vincentlab/GiMMiK Author: Freddie Witherden Author-email: freddie@witherden.org License: BSD Description: GiMMiK is a Python based kernel generator for matrix multiplication kernels for various accelerator platforms. For small operator matrices the generated kernels are capable of outperfoming the state-of-the-art general matrix multiplication routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally developed as part of Bartosz Wozniak's master's thesis in the Department of Computing at Imperial College London and is currently maintained by Freddie Witherden. Keywords: Matrix Multiplication,GPU,CUDA,OpenCL Platform: UNKNOWN Classifier: License :: OSI Approved :: BSD License Classifier: Programming Language :: Python :: 3.3 Classifier: Topic :: Scientific/Engineering gimmik-2.1/README.rst0000644000076500000240000000316212755167376015136 0ustar vincentstaff00000000000000GiMMiK ====== Generator of Matrix Multiplication Kernels - GiMMiK - is a tool for generation of high performance matrix multiplication kernel code for various accelerator platforms. Currently CUDA and OpenCL are the only supported platforms. What does GiMMiK do? -------------------- Consider matrix multiplication of the form C = α ∙ A ⨉ B + β ∙ C GiMMiK generates fully unrolled kernels, highly specialised to a given operator matrix. The generated code is fully unrolled - each kernel computes a single column of the output matrix. GiMMiK was designed to perform well in a Block by Panel type of matrix multiplication where the operator matrix is small. GiMMiK also removes any sparsity form the operator matrix as well as attempts to reduce common sub-expressions. How do I install GiMMiK? ------------------------ Clone the git repository and use `setup.py` to install the GiMMiK package. You will need the following dependencies: * `mako `_ * `numpy >= 1.7 `_ Once obtained, you can install GiMMiK by running :: python setup.py install to perform a system-wide install. Alternatively, run :: python setup.py install --user to install the package locally. How do I use GiMMiK? -------------------- Once installed, you are ready to use GiMMiK. .. code:: python from gimmik import generate_mm ... # Generate a CUDA kernel for C = 2*mat*B src = generate_mm(mat, np.float32, platform='cuda', alpha=2.0, beta=0.0) ... Who uses GiMMiK? ---------------- GiMMiK was develop to improve performance of the `PyFR `_ framework. gimmik-2.1/setup.cfg0000644000076500000240000000007312755351540015252 0ustar vincentstaff00000000000000[egg_info] tag_build = tag_svn_revision = 0 tag_date = 0 gimmik-2.1/setup.py0000755000076500000240000000342112755167376015162 0ustar vincentstaff00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import re from setuptools import setup import sys # Python version if sys.version_info[:2] < (3, 3): print('GiMMiK requires Python 3.3 or newer') sys.exit(-1) # GiMMiK version vfile = open('gimmik/_version.py').read() vsrch = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", vfile, re.M) if vsrch: version = vsrch.group(1) else: print('Unable to find a version string in gimmik/_version.py') # Data package_data = { 'gimmik': ['kernels/*.mako'], } # Hard dependencies install_requires = [ 'mako', 'numpy >= 1.7' ] # Info classifiers = [ 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 3.3', 'Topic :: Scientific/Engineering' ] # Long Description long_description = '''GiMMiK is a Python based kernel generator for matrix multiplication kernels for various accelerator platforms. For small operator matrices the generated kernels are capable of outperfoming the state-of-the-art general matrix multiplication routines such as cuBLAS GEMM or clBLAS GEMM. GiMMiK was originally developed as part of Bartosz Wozniak's master's thesis in the Department of Computing at Imperial College London and is currently maintained by Freddie Witherden.''' setup(name='gimmik', version=version, # Packages packages=['gimmik'], package_data=package_data, install_requires=install_requires, # Metadata description='Generator of Matrix Multiplication Kernels', long_description=long_description, maintainer='Freddie Witherden', maintainer_email='freddie@witherden.org', url='https://github.com/vincentlab/GiMMiK', license='BSD', keywords=['Matrix Multiplication', 'GPU', 'CUDA', 'OpenCL'], classifiers=classifiers)