numexpr-2.2.2/0000755000175000001440000000000012221261175013650 5ustar faltetusers00000000000000numexpr-2.2.2/setup.py0000644000175000001440000001571112132261472015370 0ustar faltetusers00000000000000#!/usr/bin/env python ################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import shutil import os, sys import os.path as op from distutils.command.clean import clean import numpy from numpy.distutils.command.build_ext import build_ext as numpy_build_ext minimum_numpy_version = "1.6" if sys.version_info < (2, 6): raise RuntimeError("must use python 2.6 or greater") if numpy.__version__ < minimum_numpy_version: print("*Error*: NumPy version is lower than needed: %s < %s" % (numpy.__version__, minimum_numpy_version)) sys.exit(1) try: import setuptools except ImportError: setuptools = None extra_setup_opts = {} if setuptools: extra_setup_opts['zip_safe'] = False try: # Python 3 # Code taken form numpy/distutils/command/build_py.py # XXX: update LICENSES from distutils.command.build_py import build_py_2to3 as old_build_py from numpy.distutils.misc_util import is_string class build_py(old_build_py): def run(self): build_src = self.get_finalized_command('build_src') if build_src.py_modules_dict and self.packages is None: self.packages = list(build_src.py_modules_dict.keys ()) old_build_py.run(self) def find_package_modules(self, package, package_dir): modules = old_build_py.find_package_modules(self, package, package_dir) # Find build_src generated *.py files. build_src = self.get_finalized_command('build_src') modules += build_src.py_modules_dict.get(package,[]) return modules def find_modules(self): old_py_modules = self.py_modules[:] new_py_modules = list(filter(is_string, self.py_modules)) self.py_modules[:] = new_py_modules modules = old_build_py.find_modules(self) self.py_modules[:] = old_py_modules return modules # XXX: Fix find_source_files for item in py_modules such that item is 3-tuple # and item[2] is source file. except ImportError: # Python 2 from numpy.distutils.command.build_py import build_py DEBUG = False def localpath(*args): return op.abspath(op.join(*((op.dirname(__file__),)+args))) def debug(instring): if DEBUG: print(" DEBUG: "+instring) def configuration(): from numpy.distutils.misc_util import Configuration, dict_append from numpy.distutils.system_info import system_info config = Configuration('numexpr') #try to find configuration for MKL, either from environment or site.cfg if op.exists('site.cfg'): mkl_config_data = config.get_info('mkl') # some version of MKL need to be linked with libgfortran, for this, use # entries of DEFAULT section in site.cfg default_config = system_info() dict_append(mkl_config_data, libraries = default_config.get_libraries(), library_dirs = default_config.get_lib_dirs() ) else: mkl_config_data = {} #setup information for C extension if os.name == 'nt': pthread_win = ['numexpr/win32/pthread.c'] else: pthread_win = [] extension_config_data = { 'sources': ['numexpr/interpreter.cpp', 'numexpr/module.cpp', 'numexpr/numexpr_object.cpp'] + pthread_win, 'depends': ['numexpr/interp_body.cpp', 'numexpr/complex_functions.hpp', 'numexpr/interpreter.hpp', 'numexpr/module.hpp', 'numexpr/msvc_function_stubs.hpp', 'numexpr/numexpr_config.hpp', 'numexpr/numexpr_object.hpp'], 'libraries': ['m'], 'extra_compile_args': ['-funroll-all-loops',], } dict_append(extension_config_data, **mkl_config_data) if 'library_dirs' in mkl_config_data: library_dirs = ':'.join(mkl_config_data['library_dirs']) rpath_link = '-Xlinker --rpath -Xlinker %s' % library_dirs extension_config_data['extra_link_args'] = [rpath_link] config.add_extension('interpreter', **extension_config_data) config.make_config_py() config.add_subpackage('tests', 'numexpr/tests') #version handling config.make_svn_version_py() config.get_version('numexpr/version.py') return config class cleaner(clean): def run(self): # Recursive deletion of build/ directory path = localpath("build") try: shutil.rmtree(path) except Exception: debug("Failed to remove directory %s" % path) else: debug("Cleaned up %s" % path) # Now, the extension and other files try: import imp except ImportError: if os.name == 'posix': paths = [localpath("numexpr/interpreter.so")] else: paths = [localpath("numexpr/interpreter.pyd")] else: paths = [] for suffix, _, _ in imp.get_suffixes(): if suffix == '.py': continue paths.append(localpath("numexpr", "interpreter" + suffix)) paths.append(localpath("numexpr/__config__.py")) paths.append(localpath("numexpr/__config__.pyc")) for path in paths: try: os.remove(path) except Exception: debug("Failed to clean up file %s" % path) else: debug("Cleaning up %s" % path) clean.run(self) def setup_package(): import os from numpy.distutils.core import setup extra_setup_opts['cmdclass'] = {'build_ext': build_ext, 'clean': cleaner, 'build_py': build_py, } setup(#name='numexpr', # name already set in numpy.distutils description='Fast numerical expression evaluator for NumPy', author='David M. Cooke, Francesc Alted and others', author_email='david.m.cooke@gmail.com, faltet@pytables.org', url='http://code.google.com/p/numexpr/', license = 'MIT', packages = ['numexpr'], configuration = configuration, **extra_setup_opts ) class build_ext(numpy_build_ext): def build_extension(self, ext): # at this point we know what the C compiler is. if self.compiler.compiler_type == 'msvc': ext.extra_compile_args = [] # also remove extra linker arguments msvc doesn't understand ext.extra_link_args = [] # also remove gcc math library ext.libraries.remove('m') numpy_build_ext.build_extension(self, ext) if __name__ == '__main__': setup_package() numexpr-2.2.2/LICENSE.txt0000644000175000001440000000225112132261472015474 0ustar faltetusers00000000000000Copyright (c) 2007,2008 David M. Cooke Copyright (c) 2009,2010 Francesc Alted Copyright (c) 2011- See AUTHORS.txt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. numexpr-2.2.2/numexpr/0000755000175000001440000000000012221261175015346 5ustar faltetusers00000000000000numexpr-2.2.2/numexpr/module.cpp0000644000175000001440000003057512132621274017352 0ustar faltetusers00000000000000// Numexpr - Fast numerical array expression evaluator for NumPy. // // License: MIT // Author: See AUTHORS.txt // // See LICENSE.txt for details about copyright and rights to use. // // module.cpp contains the CPython-specific module exposure. #define DO_NUMPY_IMPORT_ARRAY #include "module.hpp" #include #include #include "interpreter.hpp" #include "numexpr_object.hpp" using namespace std; // Global state. The file interpreter.hpp also has some global state // in its 'th_params' variable global_state gs; /* Do the worker job for a certain thread */ void *th_worker(void *tidptr) { int tid = *(int *)tidptr; /* Parameters for threads */ npy_intp start; npy_intp vlen; npy_intp block_size; NpyIter *iter; vm_params params; int *pc_error; int ret; int n_inputs; int n_constants; int n_temps; size_t memsize; char **mem; npy_intp *memsteps; npy_intp istart, iend; char **errmsg; // For output buffering if needed vector out_buffer; while (1) { gs.init_sentinels_done = 0; /* sentinels have to be initialised yet */ /* Meeting point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } else { pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Check if thread has been asked to return */ if (gs.end_threads) { return(0); } /* Get parameters for this thread before entering the main loop */ start = th_params.start; vlen = th_params.vlen; block_size = th_params.block_size; params = th_params.params; pc_error = th_params.pc_error; // If output buffering is needed, allocate it if (th_params.need_output_buffering) { out_buffer.resize(params.memsizes[0] * BLOCK_SIZE1); params.out_buffer = &out_buffer[0]; } else { params.out_buffer = NULL; } /* Populate private data for each thread */ n_inputs = params.n_inputs; n_constants = params.n_constants; n_temps = params.n_temps; memsize = (1+n_inputs+n_constants+n_temps) * sizeof(char *); /* XXX malloc seems thread safe for POSIX, but for Win? */ mem = (char **)malloc(memsize); memcpy(mem, params.mem, memsize); errmsg = th_params.errmsg; params.mem = mem; /* Loop over blocks */ pthread_mutex_lock(&gs.count_mutex); if (!gs.init_sentinels_done) { /* Set sentinels and other global variables */ gs.gindex = start; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } gs.init_sentinels_done = 1; /* sentinels have been initialised */ gs.giveup = 0; /* no giveup initially */ } else { gs.gindex += block_size; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } } /* Grab one of the iterators */ iter = th_params.iter[tid]; if (iter == NULL) { th_params.ret_code = -1; gs.giveup = 1; } memsteps = th_params.memsteps[tid]; /* Get temporary space for each thread */ ret = get_temps_space(params, mem, BLOCK_SIZE1); if (ret < 0) { /* Propagate error to main thread */ th_params.ret_code = ret; gs.giveup = 1; } pthread_mutex_unlock(&gs.count_mutex); while (istart < vlen && !gs.giveup) { /* Reset the iterator to the range for this task */ ret = NpyIter_ResetToIterIndexRange(iter, istart, iend, errmsg); /* Execute the task */ if (ret >= 0) { ret = vm_engine_iter_task(iter, memsteps, params, pc_error, errmsg); } if (ret < 0) { pthread_mutex_lock(&gs.count_mutex); gs.giveup = 1; /* Propagate error to main thread */ th_params.ret_code = ret; pthread_mutex_unlock(&gs.count_mutex); break; } pthread_mutex_lock(&gs.count_mutex); gs.gindex += block_size; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } pthread_mutex_unlock(&gs.count_mutex); } /* Meeting point for all threads (wait for finalization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads > 0) { gs.count_threads--; pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } else { pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Release resources */ free_temps_space(params, mem); free(mem); } /* closes while(1) */ /* This should never be reached, but anyway */ return(0); } /* Initialize threads */ int init_threads(void) { int tid, rc; /* Initialize mutex and condition variable objects */ pthread_mutex_init(&gs.count_mutex, NULL); /* Barrier initialization */ pthread_mutex_init(&gs.count_threads_mutex, NULL); pthread_cond_init(&gs.count_threads_cv, NULL); gs.count_threads = 0; /* Reset threads counter */ /* Finally, create the threads */ for (tid = 0; tid < gs.nthreads; tid++) { gs.tids[tid] = tid; rc = pthread_create(&gs.threads[tid], NULL, th_worker, (void *)&gs.tids[tid]); if (rc) { fprintf(stderr, "ERROR; return code from pthread_create() is %d\n", rc); fprintf(stderr, "\tError detail: %s\n", strerror(rc)); exit(-1); } } gs.init_threads_done = 1; /* Initialization done! */ gs.pid = (int)getpid(); /* save the PID for this process */ return(0); } /* Set the number of threads in numexpr's VM */ int numexpr_set_nthreads(int nthreads_new) { int nthreads_old = gs.nthreads; int t, rc; void *status; if (nthreads_new > MAX_THREADS) { fprintf(stderr, "Error. nthreads cannot be larger than MAX_THREADS (%d)", MAX_THREADS); return -1; } else if (nthreads_new <= 0) { fprintf(stderr, "Error. nthreads must be a positive integer"); return -1; } /* Only join threads if they are not initialized or if our PID is different from that in pid var (probably means that we are a subprocess, and thus threads are non-existent). */ if (gs.nthreads > 1 && gs.init_threads_done && gs.pid == getpid()) { /* Tell all existing threads to finish */ gs.end_threads = 1; pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } else { pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Join exiting threads */ for (t=0; t 1 && (!gs.init_threads_done || gs.pid != getpid())) { init_threads(); } return nthreads_old; } #ifdef USE_VML static PyObject * _get_vml_version(PyObject *self, PyObject *args) { int len=198; char buf[198]; MKL_Get_Version_String(buf, len); return Py_BuildValue("s", buf); } static PyObject * _set_vml_accuracy_mode(PyObject *self, PyObject *args) { int mode_in, mode_old; if (!PyArg_ParseTuple(args, "i", &mode_in)) return NULL; mode_old = vmlGetMode() & VML_ACCURACY_MASK; vmlSetMode((mode_in & VML_ACCURACY_MASK) | VML_ERRMODE_IGNORE ); return Py_BuildValue("i", mode_old); } static PyObject * _set_vml_num_threads(PyObject *self, PyObject *args) { int max_num_threads; if (!PyArg_ParseTuple(args, "i", &max_num_threads)) return NULL; mkl_domain_set_num_threads(max_num_threads, MKL_VML); Py_RETURN_NONE; } #endif static PyObject * _set_num_threads(PyObject *self, PyObject *args) { int num_threads, nthreads_old; if (!PyArg_ParseTuple(args, "i", &num_threads)) return NULL; nthreads_old = numexpr_set_nthreads(num_threads); return Py_BuildValue("i", nthreads_old); } static PyMethodDef module_methods[] = { #ifdef USE_VML {"_get_vml_version", _get_vml_version, METH_VARARGS, "Get the VML/MKL library version."}, {"_set_vml_accuracy_mode", _set_vml_accuracy_mode, METH_VARARGS, "Set accuracy mode for VML functions."}, {"_set_vml_num_threads", _set_vml_num_threads, METH_VARARGS, "Suggests a maximum number of threads to be used in VML operations."}, #endif {"_set_num_threads", _set_num_threads, METH_VARARGS, "Suggests a maximum number of threads to be used in operations."}, {NULL} }; static int add_symbol(PyObject *d, const char *sname, int name, const char* routine_name) { PyObject *o, *s; int r; if (!sname) { return 0; } o = PyLong_FromLong(name); s = PyBytes_FromString(sname); if (!s) { PyErr_SetString(PyExc_RuntimeError, routine_name); return -1; } r = PyDict_SetItem(d, s, o); Py_XDECREF(o); return r; } #ifdef __cplusplus extern "C" { #endif #if PY_MAJOR_VERSION >= 3 /* XXX: handle the "global_state" state via moduedef */ static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "interpreter", NULL, -1, /* sizeof(struct global_state), */ module_methods, NULL, NULL, /* module_traverse, */ NULL, /* module_clear, */ NULL }; #define INITERROR return NULL PyObject * PyInit_interpreter(void) #else #define INITERROR return PyMODINIT_FUNC initinterpreter() #endif { PyObject *m, *d; if (PyType_Ready(&NumExprType) < 0) INITERROR; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); #else m = Py_InitModule3("interpreter", module_methods, NULL); #endif if (m == NULL) INITERROR; Py_INCREF(&NumExprType); PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType); import_array(); d = PyDict_New(); if (!d) INITERROR; #define OPCODE(n, name, sname, ...) \ if (add_symbol(d, sname, name, "add_op") < 0) { INITERROR; } #include "opcodes.hpp" #undef OPCODE if (PyModule_AddObject(m, "opcodes", d) < 0) INITERROR; d = PyDict_New(); if (!d) INITERROR; #define add_func(name, sname) \ if (add_symbol(d, sname, name, "add_func") < 0) { INITERROR; } #define FUNC_FF(name, sname, ...) add_func(name, sname); #define FUNC_FFF(name, sname, ...) add_func(name, sname); #define FUNC_DD(name, sname, ...) add_func(name, sname); #define FUNC_DDD(name, sname, ...) add_func(name, sname); #define FUNC_CC(name, sname, ...) add_func(name, sname); #define FUNC_CCC(name, sname, ...) add_func(name, sname); #include "functions.hpp" #undef FUNC_CCC #undef FUNC_CC #undef FUNC_DDD #undef FUNC_DD #undef FUNC_DD #undef FUNC_FFF #undef FUNC_FF #undef add_func if (PyModule_AddObject(m, "funccodes", d) < 0) INITERROR; if (PyModule_AddObject(m, "allaxes", PyLong_FromLong(255)) < 0) INITERROR; if (PyModule_AddObject(m, "maxdims", PyLong_FromLong(NPY_MAXDIMS)) < 0) INITERROR; #if PY_MAJOR_VERSION >= 3 return m; #endif } #ifdef __cplusplus } // extern "C" #endif numexpr-2.2.2/numexpr/complex_functions.hpp0000644000175000001440000002040112132261472021614 0ustar faltetusers00000000000000#ifndef NUMEXPR_COMPLEX_FUNCTIONS_HPP #define NUMEXPR_COMPLEX_FUNCTIONS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ // TODO: Could just use std::complex and std::complex /* constants */ static npy_cdouble nc_1 = {1., 0.}; static npy_cdouble nc_half = {0.5, 0.}; static npy_cdouble nc_i = {0., 1.}; static npy_cdouble nc_i2 = {0., 0.5}; /* static npy_cdouble nc_mi = {0., -1.}; static npy_cdouble nc_pi2 = {M_PI/2., 0.}; */ /* *************************** WARNING ***************************** Due to the way Numexpr places the results of operations, the *x and *r pointers do point to the same address (apparently this doesn't happen in NumPy). So, measures should be taken so as to not to reuse *x after the first *r has been overwritten. ********************************************************************* */ static void nc_assign(npy_cdouble *x, npy_cdouble *r) { r->real = x->real; r->imag = x->imag; return; } static void nc_sum(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { r->real = a->real + b->real; r->imag = a->imag + b->imag; return; } static void nc_diff(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { r->real = a->real - b->real; r->imag = a->imag - b->imag; return; } static void nc_neg(npy_cdouble *a, npy_cdouble *r) { r->real = -a->real; r->imag = -a->imag; return; } static void nc_prod(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; r->real = ar*br - ai*bi; r->imag = ar*bi + ai*br; return; } static void nc_quot(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; double d = br*br + bi*bi; r->real = (ar*br + ai*bi)/d; r->imag = (ai*br - ar*bi)/d; return; } static void nc_sqrt(npy_cdouble *x, npy_cdouble *r) { double s,d; if (x->real == 0. && x->imag == 0.) *r = *x; else { s = sqrt((fabs(x->real) + hypot(x->real,x->imag))/2); d = x->imag/(2*s); if (x->real > 0.) { r->real = s; r->imag = d; } else if (x->imag >= 0.) { r->real = d; r->imag = s; } else { r->real = -d; r->imag = -s; } } return; } static void nc_log(npy_cdouble *x, npy_cdouble *r) { double l = hypot(x->real,x->imag); r->imag = atan2(x->imag, x->real); r->real = log(l); return; } static void nc_log1p(npy_cdouble *x, npy_cdouble *r) { double l = hypot(x->real + 1.0,x->imag); r->imag = atan2(x->imag, x->real + 1.0); r->real = log(l); return; } static void nc_exp(npy_cdouble *x, npy_cdouble *r) { double a = exp(x->real); r->real = a*cos(x->imag); r->imag = a*sin(x->imag); return; } static void nc_expm1(npy_cdouble *x, npy_cdouble *r) { double a = exp(x->real); r->real = a*cos(x->imag) - 1.0; r->imag = a*sin(x->imag); return; } static void nc_pow(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { npy_intp n; double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; if (br == 0. && bi == 0.) { r->real = 1.; r->imag = 0.; return; } if (ar == 0. && ai == 0.) { r->real = 0.; r->imag = 0.; return; } if (bi == 0 && (n=(npy_intp)br) == br) { if (n > -100 && n < 100) { npy_cdouble p, aa; npy_intp mask = 1; if (n < 0) n = -n; aa = nc_1; p.real = ar; p.imag = ai; while (1) { if (n & mask) nc_prod(&aa,&p,&aa); mask <<= 1; if (n < mask || mask <= 0) break; nc_prod(&p,&p,&p); } r->real = aa.real; r->imag = aa.imag; if (br < 0) nc_quot(&nc_1, r, r); return; } } /* complexobject.c uses an inline version of this formula investigate whether this had better performance or accuracy */ nc_log(a, r); nc_prod(r, b, r); nc_exp(r, r); return; } static void nc_prodi(npy_cdouble *x, npy_cdouble *r) { double xr = x->real; r->real = -x->imag; r->imag = xr; return; } static void nc_acos(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_assign(x, pa); nc_prod(x,x,r); nc_diff(&nc_1, r, r); nc_sqrt(r, r); nc_prodi(r, r); nc_sum(pa, r, r); nc_log(r, r); nc_prodi(r, r); nc_neg(r, r); return; /* return nc_neg(nc_prodi(nc_log(nc_sum(x,nc_prod(nc_i, nc_sqrt(nc_diff(nc_1,nc_prod(x,x)))))))); */ } static void nc_acosh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble t, a, *pa=&a; nc_assign(x, pa); nc_sum(x, &nc_1, &t); nc_sqrt(&t, &t); nc_diff(x, &nc_1, r); nc_sqrt(r, r); nc_prod(&t, r, r); nc_sum(pa, r, r); nc_log(r, r); return; /* return nc_log(nc_sum(x, nc_prod(nc_sqrt(nc_sum(x,nc_1)), nc_sqrt(nc_diff(x,nc_1))))); */ } static void nc_asin(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_prodi(x, pa); nc_prod(x, x, r); nc_diff(&nc_1, r, r); nc_sqrt(r, r); nc_sum(pa, r, r); nc_log(r, r); nc_prodi(r, r); nc_neg(r, r); return; /* return nc_neg(nc_prodi(nc_log(nc_sum(nc_prod(nc_i,x), nc_sqrt(nc_diff(nc_1,nc_prod(x,x))))))); */ } static void nc_asinh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_assign(x, pa); nc_prod(x, x, r); nc_sum(&nc_1, r, r); nc_sqrt(r, r); nc_sum(r, pa, r); nc_log(r, r); return; /* return nc_log(nc_sum(nc_sqrt(nc_sum(nc_1,nc_prod(x,x))),x)); */ } static void nc_atan(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_diff(&nc_i, x, pa); nc_sum(&nc_i, x, r); nc_quot(r, pa, r); nc_log(r,r); nc_prod(&nc_i2, r, r); return; /* return nc_prod(nc_i2,nc_log(nc_quot(nc_sum(nc_i,x),nc_diff(nc_i,x)))); */ } static void nc_atanh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, b, *pa=&a, *pb=&b; nc_assign(x, pa); nc_diff(&nc_1, pa, r); nc_sum(&nc_1, pa, pb); nc_quot(pb, r, r); nc_log(r, r); nc_prod(&nc_half, r, r); return; /* return nc_prod(nc_half,nc_log(nc_quot(nc_sum(nc_1,x),nc_diff(nc_1,x)))); */ } static void nc_cos(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xr)*cosh(xi); r->imag = -sin(xr)*sinh(xi); return; } static void nc_cosh(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xi)*cosh(xr); r->imag = sin(xi)*sinh(xr); return; } #define M_LOG10_E 0.434294481903251827651128918916605082294397 static void nc_log10(npy_cdouble *x, npy_cdouble *r) { nc_log(x, r); r->real *= M_LOG10_E; r->imag *= M_LOG10_E; return; } static void nc_sin(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = sin(xr)*cosh(xi); r->imag = cos(xr)*sinh(xi); return; } static void nc_sinh(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xi)*sinh(xr); r->imag = sin(xi)*cosh(xr); return; } static void nc_tan(npy_cdouble *x, npy_cdouble *r) { double sr,cr,shi,chi; double rs,is,rc,ic; double d; double xr=x->real, xi=x->imag; sr = sin(xr); cr = cos(xr); shi = sinh(xi); chi = cosh(xi); rs = sr*chi; is = cr*shi; rc = cr*chi; ic = -sr*shi; d = rc*rc + ic*ic; r->real = (rs*rc+is*ic)/d; r->imag = (is*rc-rs*ic)/d; return; } static void nc_tanh(npy_cdouble *x, npy_cdouble *r) { double si,ci,shr,chr; double rs,is,rc,ic; double d; double xr=x->real, xi=x->imag; si = sin(xi); ci = cos(xi); shr = sinh(xr); chr = cosh(xr); rs = ci*shr; is = si*chr; rc = ci*chr; ic = si*shr; d = rc*rc + ic*ic; r->real = (rs*rc+is*ic)/d; r->imag = (is*rc-rs*ic)/d; return; } static void nc_abs(npy_cdouble *x, npy_cdouble *r) { r->real = sqrt(x->real*x->real + x->imag*x->imag); r->imag = 0; } #endif // NUMEXPR_COMPLEX_FUNCTIONS_HPP numexpr-2.2.2/numexpr/numexpr_object.cpp0000644000175000001440000003455612132261472021114 0ustar faltetusers00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ #include "module.hpp" #include #include "numexpr_config.hpp" #include "interpreter.hpp" #include "numexpr_object.hpp" static int size_from_char(char c) { switch (c) { case 'b': return sizeof(char); case 'i': return sizeof(int); case 'l': return sizeof(long long); case 'f': return sizeof(float); case 'd': return sizeof(double); case 'c': return 2*sizeof(double); case 's': return 0; /* strings are ok but size must be computed */ default: PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfdcs'"); return -1; } } static void NumExpr_dealloc(NumExprObject *self) { Py_XDECREF(self->signature); Py_XDECREF(self->tempsig); Py_XDECREF(self->constsig); Py_XDECREF(self->fullsig); Py_XDECREF(self->program); Py_XDECREF(self->constants); Py_XDECREF(self->input_names); PyMem_Del(self->mem); PyMem_Del(self->rawmem); PyMem_Del(self->memsteps); PyMem_Del(self->memsizes); Py_TYPE(self)->tp_free((PyObject*)self); } static PyObject * NumExpr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { NumExprObject *self = (NumExprObject *)type->tp_alloc(type, 0); if (self != NULL) { #define INIT_WITH(name, object) \ self->name = object; \ if (!self->name) { \ Py_DECREF(self); \ return NULL; \ } INIT_WITH(signature, PyBytes_FromString("")); INIT_WITH(tempsig, PyBytes_FromString("")); INIT_WITH(constsig, PyBytes_FromString("")); INIT_WITH(fullsig, PyBytes_FromString("")); INIT_WITH(program, PyBytes_FromString("")); INIT_WITH(constants, PyTuple_New(0)); Py_INCREF(Py_None); self->input_names = Py_None; self->mem = NULL; self->rawmem = NULL; self->memsteps = NULL; self->memsizes = NULL; self->rawmemsize = 0; self->n_inputs = 0; self->n_constants = 0; self->n_temps = 0; #undef INIT_WITH } return (PyObject *)self; } static int NumExpr_init(NumExprObject *self, PyObject *args, PyObject *kwds) { int i, j, mem_offset; int n_inputs, n_constants, n_temps; PyObject *signature = NULL, *tempsig = NULL, *constsig = NULL; PyObject *fullsig = NULL, *program = NULL, *constants = NULL; PyObject *input_names = NULL, *o_constants = NULL; int *itemsizes = NULL; char **mem = NULL, *rawmem = NULL; npy_intp *memsteps; npy_intp *memsizes; int rawmemsize; static char *kwlist[] = {"signature", "tempsig", "program", "constants", "input_names", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "SSS|OO", kwlist, &signature, &tempsig, &program, &o_constants, &input_names)) { return -1; } n_inputs = (int)PyBytes_Size(signature); n_temps = (int)PyBytes_Size(tempsig); if (o_constants) { if (!PySequence_Check(o_constants) ) { PyErr_SetString(PyExc_TypeError, "constants must be a sequence"); return -1; } n_constants = (int)PySequence_Length(o_constants); if (!(constants = PyTuple_New(n_constants))) return -1; if (!(constsig = PyBytes_FromStringAndSize(NULL, n_constants))) { Py_DECREF(constants); return -1; } if (!(itemsizes = PyMem_New(int, n_constants))) { Py_DECREF(constants); return -1; } for (i = 0; i < n_constants; i++) { PyObject *o; if (!(o = PySequence_GetItem(o_constants, i))) { /* new reference */ Py_DECREF(constants); Py_DECREF(constsig); PyMem_Del(itemsizes); return -1; } PyTuple_SET_ITEM(constants, i, o); /* steals reference */ if (PyBool_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 'b'; itemsizes[i] = size_from_char('b'); continue; } #if PY_MAJOR_VERSION < 3 if (PyInt_Check(o)) { #else if (PyArray_IsScalar(o, Int32)) { #endif PyBytes_AS_STRING(constsig)[i] = 'i'; itemsizes[i] = size_from_char('i'); continue; } #if PY_MAJOR_VERSION < 3 if (PyLong_Check(o)) { #else if (PyArray_IsScalar(o, Int64)) { #endif PyBytes_AS_STRING(constsig)[i] = 'l'; itemsizes[i] = size_from_char('l'); continue; } /* The Float32 scalars are the only ones that should reach here */ if (PyArray_IsScalar(o, Float32)) { PyBytes_AS_STRING(constsig)[i] = 'f'; itemsizes[i] = size_from_char('f'); continue; } if (PyFloat_Check(o)) { /* Python float constants are double precision by default */ PyBytes_AS_STRING(constsig)[i] = 'd'; itemsizes[i] = size_from_char('d'); continue; } if (PyComplex_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 'c'; itemsizes[i] = size_from_char('c'); continue; } if (PyBytes_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 's'; itemsizes[i] = (int)PyBytes_GET_SIZE(o); continue; } PyErr_SetString(PyExc_TypeError, "constants must be of type bool/int/long/float/double/complex/bytes"); Py_DECREF(constsig); Py_DECREF(constants); PyMem_Del(itemsizes); return -1; } } else { n_constants = 0; if (!(constants = PyTuple_New(0))) return -1; if (!(constsig = PyBytes_FromString(""))) { Py_DECREF(constants); return -1; } } fullsig = PyBytes_FromFormat("%c%s%s%s", get_return_sig(program), PyBytes_AS_STRING(signature), PyBytes_AS_STRING(constsig), PyBytes_AS_STRING(tempsig)); if (!fullsig) { Py_DECREF(constants); Py_DECREF(constsig); PyMem_Del(itemsizes); return -1; } if (!input_names) { input_names = Py_None; } /* Compute the size of registers. We leave temps out (will be malloc'ed later on). */ rawmemsize = 0; for (i = 0; i < n_constants; i++) rawmemsize += itemsizes[i]; rawmemsize *= BLOCK_SIZE1; mem = PyMem_New(char *, 1 + n_inputs + n_constants + n_temps); rawmem = PyMem_New(char, rawmemsize); memsteps = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); memsizes = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); if (!mem || !rawmem || !memsteps || !memsizes) { Py_DECREF(constants); Py_DECREF(constsig); Py_DECREF(fullsig); PyMem_Del(itemsizes); PyMem_Del(mem); PyMem_Del(rawmem); PyMem_Del(memsteps); PyMem_Del(memsizes); return -1; } /* 0 -> output [1, n_inputs+1) -> inputs [n_inputs+1, n_inputs+n_consts+1) -> constants [n_inputs+n_consts+1, n_inputs+n_consts+n_temps+1) -> temps */ /* Fill in 'mem' and 'rawmem' for constants */ mem_offset = 0; for (i = 0; i < n_constants; i++) { char c = PyBytes_AS_STRING(constsig)[i]; int size = itemsizes[i]; mem[i+n_inputs+1] = rawmem + mem_offset; mem_offset += BLOCK_SIZE1 * size; memsteps[i+n_inputs+1] = memsizes[i+n_inputs+1] = size; /* fill in the constants */ if (c == 'b') { char *bmem = (char*)mem[i+n_inputs+1]; char value = (char)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { bmem[j] = value; } } else if (c == 'i') { int *imem = (int*)mem[i+n_inputs+1]; int value = (int)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { imem[j] = value; } } else if (c == 'l') { long long *lmem = (long long*)mem[i+n_inputs+1]; long long value = PyLong_AsLongLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { lmem[j] = value; } } else if (c == 'f') { /* In this particular case the constant is in a NumPy scalar and in a regular Python object */ float *fmem = (float*)mem[i+n_inputs+1]; float value = PyArrayScalar_VAL(PyTuple_GET_ITEM(constants, i), Float); for (j = 0; j < BLOCK_SIZE1; j++) { fmem[j] = value; } } else if (c == 'd') { double *dmem = (double*)mem[i+n_inputs+1]; double value = PyFloat_AS_DOUBLE(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { dmem[j] = value; } } else if (c == 'c') { double *cmem = (double*)mem[i+n_inputs+1]; Py_complex value = PyComplex_AsCComplex(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < 2*BLOCK_SIZE1; j+=2) { cmem[j] = value.real; cmem[j+1] = value.imag; } } else if (c == 's') { char *smem = (char*)mem[i+n_inputs+1]; char *value = PyBytes_AS_STRING(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < size*BLOCK_SIZE1; j+=size) { memcpy(smem + j, value, size); } } } /* This is no longer needed since no unusual item sizes appear in temporaries (there are no string temporaries). */ PyMem_Del(itemsizes); /* Fill in 'memsteps' and 'memsizes' for temps */ for (i = 0; i < n_temps; i++) { char c = PyBytes_AS_STRING(tempsig)[i]; int size = size_from_char(c); memsteps[i+n_inputs+n_constants+1] = size; memsizes[i+n_inputs+n_constants+1] = size; } /* See if any errors occured (e.g., in size_from_char) or if mem_offset is wrong */ if (PyErr_Occurred() || mem_offset != rawmemsize) { if (mem_offset != rawmemsize) { PyErr_Format(PyExc_RuntimeError, "mem_offset does not match rawmemsize"); } Py_DECREF(constants); Py_DECREF(constsig); Py_DECREF(fullsig); PyMem_Del(mem); PyMem_Del(rawmem); PyMem_Del(memsteps); PyMem_Del(memsizes); return -1; } #define REPLACE_OBJ(arg) \ {PyObject *tmp = self->arg; \ self->arg = arg; \ Py_XDECREF(tmp);} #define INCREF_REPLACE_OBJ(arg) {Py_INCREF(arg); REPLACE_OBJ(arg);} #define REPLACE_MEM(arg) {PyMem_Del(self->arg); self->arg=arg;} INCREF_REPLACE_OBJ(signature); INCREF_REPLACE_OBJ(tempsig); REPLACE_OBJ(constsig); REPLACE_OBJ(fullsig); INCREF_REPLACE_OBJ(program); REPLACE_OBJ(constants); INCREF_REPLACE_OBJ(input_names); REPLACE_MEM(mem); REPLACE_MEM(rawmem); REPLACE_MEM(memsteps); REPLACE_MEM(memsizes); self->rawmemsize = rawmemsize; self->n_inputs = n_inputs; self->n_constants = n_constants; self->n_temps = n_temps; #undef REPLACE_OBJ #undef INCREF_REPLACE_OBJ #undef REPLACE_MEM return check_program(self); } static PyMethodDef NumExpr_methods[] = { {"run", (PyCFunction) NumExpr_run, METH_VARARGS|METH_KEYWORDS, NULL}, {NULL, NULL} }; static PyMemberDef NumExpr_members[] = { {"signature", T_OBJECT_EX, offsetof(NumExprObject, signature), READONLY, NULL}, {"constsig", T_OBJECT_EX, offsetof(NumExprObject, constsig), READONLY, NULL}, {"tempsig", T_OBJECT_EX, offsetof(NumExprObject, tempsig), READONLY, NULL}, {"fullsig", T_OBJECT_EX, offsetof(NumExprObject, fullsig), READONLY, NULL}, {"program", T_OBJECT_EX, offsetof(NumExprObject, program), READONLY, NULL}, {"constants", T_OBJECT_EX, offsetof(NumExprObject, constants), READONLY, NULL}, {"input_names", T_OBJECT, offsetof(NumExprObject, input_names), 0, NULL}, {NULL}, }; PyTypeObject NumExprType = { PyVarObject_HEAD_INIT(NULL, 0) "numexpr.NumExpr", /*tp_name*/ sizeof(NumExprObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)NumExpr_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ (ternaryfunc)NumExpr_run, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "NumExpr objects", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ NumExpr_methods, /* tp_methods */ NumExpr_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)NumExpr_init, /* tp_init */ 0, /* tp_alloc */ NumExpr_new, /* tp_new */ }; numexpr-2.2.2/numexpr/tests/0000755000175000001440000000000012221261175016510 5ustar faltetusers00000000000000numexpr-2.2.2/numexpr/tests/test_numexpr.py0000644000175000001440000007012212221260713021616 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import os import sys import warnings import numpy from numpy import ( array, arange, empty, zeros, int32, int64, uint16, complex_, float64, rec, copy, ones_like, where, alltrue, linspace, sum, prod, sqrt, fmod, sin, cos, tan, arcsin, arccos, arctan, arctan2, sinh, cosh, tanh, arcsinh, arccosh, arctanh, log, log1p, log10, exp, expm1) from numpy.testing import (assert_equal, assert_array_equal, assert_array_almost_equal, assert_allclose) from numpy import shape, allclose, array_equal, ravel, isnan, isinf import numexpr from numexpr import E, NumExpr, evaluate, disassemble, use_vml import unittest TestCase = unittest.TestCase double = numpy.double # Recommended minimum versions minimum_numpy_version = "1.6" class test_numexpr(TestCase): """Testing with 1 thread""" nthreads = 1 def setUp(self): numexpr.set_num_threads(self.nthreads) def test_simple(self): ex = 2.0 * E.a + 3.0 * E.b * E.c sig = [('a', double), ('b', double), ('c', double)] func = NumExpr(ex, signature=sig) x = func(array([1., 2, 3]), array([4., 5, 6]), array([7., 8, 9])) assert_array_equal(x, array([ 86., 124., 168.])) def test_simple_expr_small_array(self): func = NumExpr(E.a) x = arange(100.0) y = func(x) assert_array_equal(x, y) def test_simple_expr(self): func = NumExpr(E.a) x = arange(1e6) y = func(x) assert_array_equal(x, y) def test_rational_expr(self): func = NumExpr((E.a + 2.0*E.b) / (1 + E.a + 4*E.b*E.b)) a = arange(1e6) b = arange(1e6) * 0.1 x = (a + 2*b) / (1 + a + 4*b*b) y = func(a, b) assert_array_almost_equal(x, y) def test_reductions(self): # Check that they compile OK. assert_equal(disassemble( NumExpr("sum(x**2+2, axis=None)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'sum_ddn', b'r0', b't3', None)]) assert_equal(disassemble( NumExpr("sum(x**2+2, axis=1)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'sum_ddn', b'r0', b't3', 1)]) assert_equal(disassemble( NumExpr("prod(x**2+2, axis=2)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'prod_ddn', b'r0', b't3', 2)]) # Check that full reductions work. x = zeros(1e5)+.01 # checks issue #41 assert_allclose(evaluate("sum(x+2,axis=None)"), sum(x+2,axis=None)) assert_allclose(evaluate("sum(x+2,axis=0)"), sum(x+2,axis=0)) assert_allclose(evaluate("prod(x,axis=0)"), prod(x,axis=0)) x = arange(10.0) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0)) x = arange(100.0) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x-1,axis=0)) x = linspace(0.1,1.0,2000) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x-1,axis=0)) # Check that reductions along an axis work y = arange(9.0).reshape(3,3) assert_allclose(evaluate("sum(y**2, axis=1)"), sum(y**2, axis=1)) assert_allclose(evaluate("sum(y**2, axis=0)"), sum(y**2, axis=0)) assert_allclose(evaluate("sum(y**2, axis=None)"), sum(y**2, axis=None)) assert_allclose(evaluate("prod(y**2, axis=1)"), prod(y**2, axis=1)) assert_allclose(evaluate("prod(y**2, axis=0)"), prod(y**2, axis=0)) assert_allclose(evaluate("prod(y**2, axis=None)"), prod(y**2, axis=None)) # Check integers x = arange(10.) x = x.astype(int) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0)) # Check longs x = x.astype(long) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x**2+2,axis=0)) # Check complex x = x + .1j assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x**2+2,axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x-1,axis=0)) def test_in_place(self): x = arange(10000.).reshape(1000,10) evaluate("x + 3", out=x) assert_equal(x, arange(10000.).reshape(1000,10) + 3) y = arange(10) evaluate("(x - 3) * y + (x - 3)", out=x) assert_equal(x, arange(10000.).reshape(1000,10) * (arange(10) + 1)) def test_axis(self): y = arange(9.0).reshape(3,3) try: evaluate("sum(y, axis=2)") except ValueError: pass else: raise ValueError("should raise exception!") try: evaluate("sum(y, axis=-3)") except ValueError: pass else: raise ValueError("should raise exception!") try: # Negative axis are not supported evaluate("sum(y, axis=-1)") except ValueError: pass else: raise ValueError("should raise exception!") def test_r0_reuse(self): assert_equal(disassemble(NumExpr("x * x + 2", [('x', double)])), [(b'mul_ddd', b'r0', b'r1[x]', b'r1[x]'), (b'add_ddd', b'r0', b'r0', b'c2[2.0]')]) class test_numexpr2(test_numexpr): """Testing with 2 threads""" nthreads = 2 class test_evaluate(TestCase): def test_simple(self): a = array([1., 2., 3.]) b = array([4., 5., 6.]) c = array([7., 8., 9.]) x = evaluate("2*a + 3*b*c") assert_array_equal(x, array([ 86., 124., 168.])) def test_simple_expr_small_array(self): x = arange(100.0) y = evaluate("x") assert_array_equal(x, y) def test_simple_expr(self): x = arange(1e6) y = evaluate("x") assert_array_equal(x, y) # Test for issue #37 if sys.version_info[0] < 3: # In python 3 '/' perforns true division, not integer division. # Integer division '//' is still not suppoerted by numexpr def test_zero_div(self): x = arange(100, dtype='i4') y = evaluate("1/x") x2 = zeros(100, dtype='i4') x2[1] = 1 assert_array_equal(x2, y) # Test for issue #22 def test_true_div(self): x = arange(10, dtype='i4') assert_array_equal(evaluate("x/2"), x / 2) assert_array_equal(evaluate("x/2", truediv=False), x / 2) assert_array_equal(evaluate("x/2", truediv='auto'), x / 2) assert_array_equal(evaluate("x/2", truediv=True), x / 2.0) # PyTables uses __nonzero__ among ExpressionNode objects internally # so this should be commented out for the moment. See #24. # def test_boolean_operator(self): # x = arange(10, dtype='i4') # try: # evaluate("(x > 1) and (x < 9)") # except TypeError: # pass # else: # raise ValueError("should raise exception!") def test_rational_expr(self): a = arange(1e6) b = arange(1e6) * 0.1 x = (a + 2*b) / (1 + a + 4*b*b) y = evaluate("(a + 2*b) / (1 + a + 4*b*b)") assert_array_almost_equal(x, y) def test_complex_expr(self): def complex(a, b): c = zeros(a.shape, dtype=complex_) c.real = a c.imag = b return c a = arange(1e4) b = arange(1e4)**1e-5 z = a + 1j*b x = z.imag x = sin(complex(a, b)).real + z.imag y = evaluate("sin(complex(a, b)).real + z.imag") assert_array_almost_equal(x, y) def test_complex_strides(self): a = arange(100).reshape(10,10)[::2] b = arange(50).reshape(5,10) assert_array_equal(evaluate("a+b"), a+b) c = empty([10], dtype=[('c1', int32), ('c2', uint16)]) c['c1'] = arange(10) c['c2'].fill(0xaaaa) c1 = c['c1'] a0 = a[0] assert_array_equal(evaluate("c1"), c1) assert_array_equal(evaluate("a0+c1"), a0+c1) def test_broadcasting(self): a = arange(100).reshape(10,10)[::2] c = arange(10) d = arange(5).reshape(5,1) assert_array_equal(evaluate("a+c"), a+c) assert_array_equal(evaluate("a+d"), a+d) expr = NumExpr("2.0*a+3.0*c",[('a', double),('c', double)]) assert_array_equal(expr(a,c), 2.0*a+3.0*c) def test_all_scalar(self): a = 3. b = 4. assert_allclose(evaluate("a+b"), a+b) expr = NumExpr("2*a+3*b",[('a', double),('b', double)]) assert_equal(expr(a,b), 2*a+3*b) def test_run(self): a = arange(100).reshape(10,10)[::2] b = arange(10) expr = NumExpr("2*a+3*b",[('a', double),('b', double)]) assert_array_equal(expr(a,b), expr.run(a,b)) def test_illegal_value(self): a = arange(3) try: evaluate("a < [0, 0, 0]") except TypeError: pass else: self.fail() # Execution order set here so as to not use too many threads # during the rest of the execution. See #33 for details. def test_changing_nthreads_00_inc(self): a = linspace(-1, 1, 1e6) b = ((.25*a + .75)*a - 1.5)*a - 2 for nthreads in range(1,7): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1e6) b = ((.25*a + .75)*a - 1.5)*a - 2 for nthreads in range(6, 1, -1): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) tests = [ ('MISC', ['b*c+d*e', '2*a+3*b', '-a', 'sinh(a)', '2*a + (cos(3)+5)*sinh(cos(b))', '2*a + arctan2(a, b)', 'arcsin(0.5)', 'where(a != 0.0, 2, a)', 'where(a > 10, b < a, b > a)', 'where((a-10).real != 0.0, a, 2)', '0.25 * (a < 5) + 0.33 * (a >= 5)', 'cos(1+1)', '1+1', '1', 'cos(a2)', ])] optests = [] for op in list('+-*/%') + ['**']: optests.append("(a+1) %s (b+3)" % op) optests.append("3 %s (b+3)" % op) optests.append("(a+1) %s 4" % op) optests.append("2 %s (b+3)" % op) optests.append("(a+1) %s 2" % op) optests.append("(a+1) %s -1" % op) optests.append("(a+1) %s 0.5" % op) # Check divisions and modulus by zero (see ticket #107) optests.append("(a+1) %s 0" % op) tests.append(('OPERATIONS', optests)) cmptests = [] for op in ['<', '<=', '==', '>=', '>', '!=']: cmptests.append("a/2+5 %s b" % op) cmptests.append("a/2+5 %s 7" % op) cmptests.append("7 %s b" % op) cmptests.append("7.0 %s 5" % op) tests.append(('COMPARISONS', cmptests)) func1tests = [] for func in ['copy', 'ones_like', 'sqrt', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', 'log', 'log1p', 'log10', 'exp', 'expm1', 'abs']: func1tests.append("a + %s(b+c)" % func) tests.append(('1_ARG_FUNCS', func1tests)) func2tests = [] for func in ['arctan2', 'fmod']: func2tests.append("a + %s(b+c, d+1)" % func) func2tests.append("a + %s(b+c, 1)" % func) func2tests.append("a + %s(1, d+1)" % func) tests.append(('2_ARG_FUNCS', func2tests)) powtests = [] # n = -1, 0.5, 2, 4 already handled in section "OPERATIONS" for n in (-7, -2.5, -1.5, -1.3, -.5, 0, 0.0, 1, 2.3, 2.5, 3): powtests.append("(a+1)**%s" % n) tests.append(('POW_TESTS', powtests)) def equal(a, b, exact): if array_equal(a, b): return True if hasattr(a, 'dtype') and a.dtype in ['f4','f8']: nnans = isnan(a).sum() if nnans > 0: # For results containing NaNs, just check that the number # of NaNs is the same in both arrays. This check could be # made more exhaustive, but checking element by element in # python space is very expensive in general. return nnans == isnan(b).sum() ninfs = isinf(a).sum() if ninfs > 0: # Ditto for Inf's return ninfs == isinf(b).sum() if exact: return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0) else: if hasattr(a, 'dtype') and a.dtype == 'f4': atol = 1e-5 # Relax precission for special opcodes, like fmod else: atol = 1e-8 return (shape(a) == shape(b) and allclose(ravel(a), ravel(b), atol=atol)) class Skip(Exception): pass def test_expressions(): test_no = [0] def make_test_method(a, a2, b, c, d, e, x, expr, test_scalar, dtype, optimization, exact, section): this_locals = locals() def method(): # We don't want to listen at RuntimeWarnings like # "overflows" or "divide by zero" in plain eval(). warnings.simplefilter("ignore") npval = eval(expr, globals(), this_locals) warnings.simplefilter("always") npval = eval(expr, globals(), this_locals) try: neval = evaluate(expr, local_dict=this_locals, optimization=optimization) assert equal(npval, neval, exact), """%r (test_scalar=%r, dtype=%r, optimization=%r, exact=%r, npval=%r (%r - %r)\n neval=%r (%r - %r))""" % (expr, test_scalar, dtype.__name__, optimization, exact, npval, type(npval), shape(npval), neval, type(neval), shape(neval)) except AssertionError: raise except NotImplementedError: print('%r not implemented for %s (scalar=%d, opt=%s)' % (expr, dtype.__name__, test_scalar, optimization)) except: print('numexpr error for expression %r' % (expr,)) raise method.description = ('test_expressions(%s, test_scalar=%r, ' 'dtype=%r, optimization=%r, exact=%r)') \ % (expr, test_scalar, dtype.__name__, optimization, exact) test_no[0] += 1 method.__name__ = 'test_scalar%d_%s_%s_%s_%04d' % (test_scalar, dtype.__name__, optimization.encode('ascii'), section.encode('ascii'), test_no[0]) return method x = None for test_scalar in (0, 1, 2): for dtype in (int, long, numpy.float32, double, complex): array_size = 100 a = arange(2*array_size, dtype=dtype)[::2] a2 = zeros([array_size, array_size], dtype=dtype) b = arange(array_size, dtype=dtype) / array_size c = arange(array_size, dtype=dtype) d = arange(array_size, dtype=dtype) e = arange(array_size, dtype=dtype) if dtype == complex: a = a.real for x in [a2, b, c, d, e]: x += 1j x *= 1+1j if test_scalar == 1: a = a[array_size // 2] if test_scalar == 2: b = b[array_size // 2] for optimization, exact in [ ('none', False), ('moderate', False), ('aggressive', False)]: for section_name, section_tests in tests: for expr in section_tests: if dtype == complex and ( '<' in expr or '>' in expr or '%' in expr or "arctan2" in expr or "fmod" in expr): # skip complex comparisons or functions not # defined in complex domain. continue if (dtype in (int, long) and test_scalar and expr == '(a+1) ** -1'): continue m = make_test_method(a, a2, b, c, d, e, x, expr, test_scalar, dtype, optimization, exact, section_name) yield m class test_int64(TestCase): def test_neg(self): a = array([2**31-1, 2**31, 2**32, 2**63-1], dtype=int64) res = evaluate('-a') assert_array_equal(res, [1-2**31, -(2**31), -(2**32), 1-2**63]) self.assertEqual(res.dtype.name, 'int64') class test_int32_int64(TestCase): if sys.version_info[0] < 2: # no long literals in python 3 def test_small_long(self): # Small longs should not be downgraded to ints. res = evaluate('42L') assert_array_equal(res, 42) self.assertEqual(res.dtype.name, 'int64') def test_small_int(self): # Small ints (32-bit ones) should not be promoted to longs. res = evaluate('2') assert_array_equal(res, 2) self.assertEqual(res.dtype.name, 'int32') def test_big_int(self): # Big ints should be promoted to longs. res = evaluate('2**40') assert_array_equal(res, 2**40) self.assertEqual(res.dtype.name, 'int64') def test_long_constant_promotion(self): int32array = arange(100, dtype='int32') itwo = numpy.int32(2) ltwo = numpy.int64(2) res = int32array * 2 res32 = evaluate('int32array * itwo') res64 = evaluate('int32array * ltwo') assert_array_equal(res, res32) assert_array_equal(res, res64) self.assertEqual(res32.dtype.name, 'int32') self.assertEqual(res64.dtype.name, 'int64') def test_int64_array_promotion(self): int32array = arange(100, dtype='int32') int64array = arange(100, dtype='int64') respy = int32array * int64array resnx = evaluate('int32array * int64array') assert_array_equal(respy, resnx) self.assertEqual(resnx.dtype.name, 'int64') class test_uint32_int64(TestCase): def test_small_uint32(self): # Small uint32 should not be downgraded to ints. a = numpy.uint32(42) res = evaluate('a') assert_array_equal(res, 42) self.assertEqual(res.dtype.name, 'int64') def test_uint32_constant_promotion(self): int32array = arange(100, dtype='int32') stwo = numpy.int32(2) utwo = numpy.uint32(2) res = int32array * utwo res32 = evaluate('int32array * stwo') res64 = evaluate('int32array * utwo') assert_array_equal(res, res32) assert_array_equal(res, res64) self.assertEqual(res32.dtype.name, 'int32') self.assertEqual(res64.dtype.name, 'int64') def test_int64_array_promotion(self): uint32array = arange(100, dtype='uint32') int64array = arange(100, dtype='int64') respy = uint32array * int64array resnx = evaluate('uint32array * int64array') assert_array_equal(respy, resnx) self.assertEqual(resnx.dtype.name, 'int64') class test_strings(TestCase): BLOCK_SIZE1 = 128 BLOCK_SIZE2 = 8 str_list1 = [b'foo', b'bar', b'', b' '] str_list2 = [b'foo', b'', b'x', b' '] str_nloops = len(str_list1) * (BLOCK_SIZE1 + BLOCK_SIZE2 + 1) str_array1 = array(str_list1 * str_nloops) str_array2 = array(str_list2 * str_nloops) str_constant = b'doodoo' def test_null_chars(self): str_list = [ b'\0\0\0', b'\0\0foo\0', b'\0\0foo\0b', b'\0\0foo\0b\0', b'foo\0', b'foo\0b', b'foo\0b\0', b'foo\0bar\0baz\0\0' ] for s in str_list: r = evaluate('s') self.assertEqual(s, r.tostring()) # check *all* stored data def test_compare_copy(self): sarr = self.str_array1 expr = 'sarr' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_array(self): sarr1 = self.str_array1 sarr2 = self.str_array2 expr = 'sarr1 >= sarr2' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_variable(self): sarr = self.str_array1 svar = self.str_constant expr = 'sarr >= svar' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_constant(self): sarr = self.str_array1 expr = 'sarr >= %r' % self.str_constant res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_add_string_array(self): sarr1 = self.str_array1 sarr2 = self.str_array2 expr = 'sarr1 + sarr2' self.assert_missing_op('add_sss', expr, locals()) def test_add_numeric_array(self): sarr = self.str_array1 narr = arange(len(sarr), dtype='int32') expr = 'sarr >= narr' self.assert_missing_op('ge_bsi', expr, locals()) def assert_missing_op(self, op, expr, local_dict): msg = "expected NotImplementedError regarding '%s'" % op try: evaluate(expr, local_dict) except NotImplementedError, nie: if "'%s'" % op not in nie.args[0]: self.fail(msg) else: self.fail(msg) def test_compare_prefix(self): # Check comparing two strings where one is a prefix of the # other. for s1, s2 in [ (b'foo', b'foobar'), (b'foo', b'foo\0bar'), (b'foo\0a', b'foo\0bar') ]: self.assertTrue(evaluate('s1 < s2')) self.assertTrue(evaluate('s1 <= s2')) self.assertTrue(evaluate('~(s1 == s2)')) self.assertTrue(evaluate('~(s1 >= s2)')) self.assertTrue(evaluate('~(s1 > s2)')) # Check for NumPy array-style semantics in string equality. s1, s2 = b'foo', b'foo\0\0' self.assertTrue(evaluate('s1 == s2')) # Case for testing selections in fields which are aligned but whose # data length is not an exact multiple of the length of the record. # The following test exposes the problem only in 32-bit machines, # because in 64-bit machines 'c2' is unaligned. However, this should # check most platforms where, while not unaligned, 'len(datatype) > # boundary_alignment' is fullfilled. class test_irregular_stride(TestCase): def test_select(self): f0 = arange(10, dtype=int32) f1 = arange(10, dtype=float64) irregular = rec.fromarrays([f0, f1]) f0 = irregular['f0'] f1 = irregular['f1'] i0 = evaluate('f0 < 5') i1 = evaluate('f1 < 5') assert_array_equal(f0[i0], arange(5, dtype=int32)) assert_array_equal(f1[i1], arange(5, dtype=float64)) # Cases for testing arrays with dimensions that can be zero. class test_zerodim(TestCase): def test_zerodim1d(self): a0 = array([], dtype=int32) a1 = array([], dtype=float64) r0 = evaluate('a0 + a1') r1 = evaluate('a0 * a1') assert_array_equal(r0, a1) assert_array_equal(r1, a1) def test_zerodim3d(self): a0 = array([], dtype=int32).reshape(0,2,4) a1 = array([], dtype=float64).reshape(0,2,4) r0 = evaluate('a0 + a1') r1 = evaluate('a0 * a1') assert_array_equal(r0, a1) assert_array_equal(r1, a1) # Case test for threads class test_threading(TestCase): def test_thread(self): import threading class ThreadTest(threading.Thread): def run(self): a = arange(3) assert_array_equal(evaluate('a**3'), array([0, 1, 8])) test = ThreadTest() test.start() # The worker function for the subprocess (needs to be here because Windows # has problems pickling nested functions with the multiprocess module :-/) def _worker(qout = None): ra = numpy.arange(1e3) rows = evaluate('ra > 0') #print "Succeeded in evaluation!\n" if qout is not None: qout.put("Done") # Case test for subprocesses (via multiprocessing module) class test_subprocess(TestCase): def test_multiprocess(self): try: import multiprocessing as mp except ImportError: return # Check for two threads at least numexpr.set_num_threads(2) #print "**** Running from main process:" _worker() #print "**** Running from subprocess:" qout = mp.Queue() ps = mp.Process(target=_worker, args=(qout,)) ps.daemon = True ps.start() result = qout.get() #print result def print_versions(): """Print the versions of software that numexpr relies on.""" if numpy.__version__ < minimum_numpy_version: print "*Warning*: NumPy version is lower than recommended: %s < %s" % \ (numpy.__version__, minimum_numpy_version) print '-=' * 38 print "Numexpr version: %s" % numexpr.__version__ print "NumPy version: %s" % numpy.__version__ print 'Python version: %s' % sys.version if os.name == 'posix': (sysname, nodename, release, version, machine) = os.uname() print 'Platform: %s-%s' % (sys.platform, machine) print "AMD/Intel CPU? %s" % numexpr.is_cpu_amd_intel print "VML available? %s" % use_vml if use_vml: print "VML/MKL version: %s" % numexpr.get_vml_version() print ("Number of threads used by default: %d " "(out of %d detected cores)" % (numexpr.nthreads, numexpr.ncores)) print '-=' * 38 def test(): """ Run all the tests in the test suite. """ print_versions() return unittest.TextTestRunner().run(suite()) test.__test__ = False def suite(): import unittest import platform as pl theSuite = unittest.TestSuite() niter = 1 class TestExpressions(TestCase): pass def add_method(func): def method(self): return func() setattr(TestExpressions, func.__name__, method.__get__(None, TestExpressions)) for func in test_expressions(): add_method(func) for n in range(niter): theSuite.addTest(unittest.makeSuite(test_numexpr)) theSuite.addTest(unittest.makeSuite(test_numexpr2)) theSuite.addTest(unittest.makeSuite(test_evaluate)) theSuite.addTest(unittest.makeSuite(TestExpressions)) theSuite.addTest(unittest.makeSuite(test_int32_int64)) theSuite.addTest(unittest.makeSuite(test_uint32_int64)) theSuite.addTest(unittest.makeSuite(test_strings)) theSuite.addTest( unittest.makeSuite(test_irregular_stride) ) theSuite.addTest(unittest.makeSuite(test_zerodim)) # multiprocessing module is not supported on Hurd/kFreeBSD if (pl.system().lower() not in ('gnu', 'gnu/kfreebsd')): theSuite.addTest(unittest.makeSuite(test_subprocess)) # I need to put this test after test_subprocess because # if not, the test suite locks immediately before test_subproces. # This only happens with Windows, so I suspect of a subtle bad # interaction with threads and subprocess :-/ theSuite.addTest(unittest.makeSuite(test_threading)) return theSuite if __name__ == '__main__': print_versions() unittest.main(defaultTest = 'suite') # suite = suite() # unittest.TextTestRunner(verbosity=2).run(suite) numexpr-2.2.2/numexpr/tests/__init__.py0000644000175000001440000000067712132261472020634 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from numexpr.tests.test_numexpr import test, print_versions if __name__ == '__main__': test() numexpr-2.2.2/numexpr/opcodes.hpp0000644000175000001440000001464412132261472017525 0ustar faltetusers00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* OPCODE(n, enum_name, exported, return_type, arg1_type, arg2_type, arg3_type) `exported` is NULL if the opcode shouldn't exported by the Python module. Types are Tb, Ti, Tl, Tf, Td, Tc, Ts, Tn, and T0; these symbols should be #defined to whatever is needed. (T0 is the no-such-arg type.) */ OPCODE(0, OP_NOOP, "noop", T0, T0, T0, T0) OPCODE(1, OP_COPY_BB, "copy_bb", Tb, Tb, T0, T0) OPCODE(2, OP_INVERT_BB, "invert_bb", Tb, Tb, T0, T0) OPCODE(3, OP_AND_BBB, "and_bbb", Tb, Tb, Tb, T0) OPCODE(4, OP_OR_BBB, "or_bbb", Tb, Tb, Tb, T0) OPCODE(5, OP_EQ_BBB, "eq_bbb", Tb, Tb, Tb, T0) OPCODE(6, OP_NE_BBB, "ne_bbb", Tb, Tb, Tb, T0) OPCODE(7, OP_GT_BII, "gt_bii", Tb, Ti, Ti, T0) OPCODE(8, OP_GE_BII, "ge_bii", Tb, Ti, Ti, T0) OPCODE(9, OP_EQ_BII, "eq_bii", Tb, Ti, Ti, T0) OPCODE(10, OP_NE_BII, "ne_bii", Tb, Ti, Ti, T0) OPCODE(11, OP_GT_BLL, "gt_bll", Tb, Tl, Tl, T0) OPCODE(12, OP_GE_BLL, "ge_bll", Tb, Tl, Tl, T0) OPCODE(13, OP_EQ_BLL, "eq_bll", Tb, Tl, Tl, T0) OPCODE(14, OP_NE_BLL, "ne_bll", Tb, Tl, Tl, T0) OPCODE(15, OP_GT_BFF, "gt_bff", Tb, Tf, Tf, T0) OPCODE(16, OP_GE_BFF, "ge_bff", Tb, Tf, Tf, T0) OPCODE(17, OP_EQ_BFF, "eq_bff", Tb, Tf, Tf, T0) OPCODE(18, OP_NE_BFF, "ne_bff", Tb, Tf, Tf, T0) OPCODE(19, OP_GT_BDD, "gt_bdd", Tb, Td, Td, T0) OPCODE(20, OP_GE_BDD, "ge_bdd", Tb, Td, Td, T0) OPCODE(21, OP_EQ_BDD, "eq_bdd", Tb, Td, Td, T0) OPCODE(22, OP_NE_BDD, "ne_bdd", Tb, Td, Td, T0) OPCODE(23, OP_GT_BSS, "gt_bss", Tb, Ts, Ts, T0) OPCODE(24, OP_GE_BSS, "ge_bss", Tb, Ts, Ts, T0) OPCODE(25, OP_EQ_BSS, "eq_bss", Tb, Ts, Ts, T0) OPCODE(26, OP_NE_BSS, "ne_bss", Tb, Ts, Ts, T0) OPCODE(27, OP_CAST_IB, "cast_ib", Ti, Tb, T0, T0) OPCODE(28, OP_COPY_II, "copy_ii", Ti, Ti, T0, T0) OPCODE(29, OP_ONES_LIKE_II, "ones_like_ii", Ti, T0, T0, T0) OPCODE(30, OP_NEG_II, "neg_ii", Ti, Ti, T0, T0) OPCODE(31, OP_ADD_III, "add_iii", Ti, Ti, Ti, T0) OPCODE(32, OP_SUB_III, "sub_iii", Ti, Ti, Ti, T0) OPCODE(33, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) OPCODE(34, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) OPCODE(35, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) OPCODE(36, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) OPCODE(37, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) OPCODE(38, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) OPCODE(39, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) OPCODE(40, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) OPCODE(41, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) OPCODE(42, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) OPCODE(43, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) OPCODE(44, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) OPCODE(45, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) OPCODE(46, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) OPCODE(47, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) OPCODE(48, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) OPCODE(49, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) OPCODE(50, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) OPCODE(51, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) OPCODE(52, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) OPCODE(53, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) OPCODE(54, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) OPCODE(55, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) OPCODE(56, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) OPCODE(57, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) OPCODE(58, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) OPCODE(59, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) OPCODE(60, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) OPCODE(61, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) OPCODE(62, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) OPCODE(63, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) OPCODE(64, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) OPCODE(65, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) OPCODE(66, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) OPCODE(67, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) OPCODE(68, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) OPCODE(69, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) OPCODE(70, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) OPCODE(71, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) OPCODE(72, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) OPCODE(73, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) OPCODE(74, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) OPCODE(75, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) OPCODE(76, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) OPCODE(77, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) OPCODE(78, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) OPCODE(79, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) OPCODE(80, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) OPCODE(81, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) OPCODE(82, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) OPCODE(83, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) OPCODE(84, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) OPCODE(85, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) OPCODE(86, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) OPCODE(87, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) OPCODE(88, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) OPCODE(89, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) OPCODE(90, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) OPCODE(91, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) OPCODE(92, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) OPCODE(93, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) OPCODE(94, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) OPCODE(95, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) OPCODE(96, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) OPCODE(97, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) OPCODE(98, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) OPCODE(99, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) OPCODE(100, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) OPCODE(101, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ OPCODE(102, OP_SUM, NULL, T0, T0, T0, T0) OPCODE(103, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) OPCODE(104, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) OPCODE(105, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) OPCODE(106, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) OPCODE(107, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) OPCODE(108, OP_PROD, NULL, T0, T0, T0, T0) OPCODE(109, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) OPCODE(110, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) OPCODE(111, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) OPCODE(112, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) OPCODE(113, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) /* Should be the last opcode */ OPCODE(114, OP_END, NULL, T0, T0, T0, T0) numexpr-2.2.2/numexpr/cpuinfo.py0000755000175000001440000005472312132261472017402 0ustar faltetusers00000000000000#!/usr/bin/env python ################################################################### # cpuinfo - Get information about CPU # # License: BSD # Author: Pearu Peterson # # See LICENSES/cpuinfo.txt for details about copyright and # rights to use. #################################################################### """ cpuinfo Copyright 2002 Pearu Peterson all rights reserved, Pearu Peterson Permission to use, modify, and distribute this software is given under the terms of the NumPy (BSD style) license. See LICENSE.txt that came with this distribution for specifics. NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. Pearu Peterson """ __all__ = ['cpu'] import sys, re, types import os import subprocess import warnings import platform def getoutput(cmd, successful_status=(0,), stacklevel=1): try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = p.communicate() status = p.returncode except EnvironmentError, e: warnings.warn(str(e), UserWarning, stacklevel=stacklevel) return False, '' if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: return True, output return False, output def command_info(successful_status=(0,), stacklevel=1, **kw): info = {} for key in kw: ok, output = getoutput(kw[key], successful_status=successful_status, stacklevel=stacklevel+1) if ok: info[key] = output.strip() return info def command_by_line(cmd, successful_status=(0,), stacklevel=1): ok, output = getoutput(cmd, successful_status=successful_status, stacklevel=stacklevel+1) if not ok: return # XXX: check output = output.decode('ascii') for line in output.splitlines(): yield line.strip() def key_value_from_command(cmd, sep, successful_status=(0,), stacklevel=1): d = {} for line in command_by_line(cmd, successful_status=successful_status, stacklevel=stacklevel+1): l = [s.strip() for s in line.split(sep, 1)] if len(l) == 2: d[l[0]] = l[1] return d class CPUInfoBase(object): """Holds CPU information and provides methods for requiring the availability of various CPU features. """ def _try_call(self,func): try: return func() except: pass def __getattr__(self,name): if not name.startswith('_'): if hasattr(self,'_'+name): attr = getattr(self,'_'+name) if type(attr) is types.MethodType: return lambda func=self._try_call,attr=attr : func(attr) else: return lambda : None raise AttributeError,name def _getNCPUs(self): return 1 def __get_nbits(self): abits = platform.architecture()[0] nbits = re.compile('(\d+)bit').search(abits).group(1) return nbits def _is_32bit(self): return self.__get_nbits() == '32' def _is_64bit(self): return self.__get_nbits() == '64' class LinuxCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = [ {} ] ok, output = getoutput(['uname', '-m']) if ok: info[0]['uname_m'] = output.strip() try: fo = open('/proc/cpuinfo') except EnvironmentError, e: warnings.warn(str(e), UserWarning) else: for line in fo: name_value = [s.strip() for s in line.split(':', 1)] if len(name_value) != 2: continue name, value = name_value if not info or name in info[-1]: # next processor info.append({}) info[-1][name] = value fo.close() self.__class__.info = info def _not_impl(self): pass # Athlon def _is_AMD(self): return self.info[0]['vendor_id']=='AuthenticAMD' def _is_AthlonK6_2(self): return self._is_AMD() and self.info[0]['model'] == '2' def _is_AthlonK6_3(self): return self._is_AMD() and self.info[0]['model'] == '3' def _is_AthlonK6(self): return re.match(r'.*?AMD-K6',self.info[0]['model name']) is not None def _is_AthlonK7(self): return re.match(r'.*?AMD-K7',self.info[0]['model name']) is not None def _is_AthlonMP(self): return re.match(r'.*?Athlon\(tm\) MP\b', self.info[0]['model name']) is not None def _is_AMD64(self): return self.is_AMD() and self.info[0]['family'] == '15' def _is_Athlon64(self): return re.match(r'.*?Athlon\(tm\) 64\b', self.info[0]['model name']) is not None def _is_AthlonHX(self): return re.match(r'.*?Athlon HX\b', self.info[0]['model name']) is not None def _is_Opteron(self): return re.match(r'.*?Opteron\b', self.info[0]['model name']) is not None def _is_Hammer(self): return re.match(r'.*?Hammer\b', self.info[0]['model name']) is not None # Alpha def _is_Alpha(self): return self.info[0]['cpu']=='Alpha' def _is_EV4(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' def _is_EV5(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' def _is_EV56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' def _is_PCA56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' # Intel #XXX _is_i386 = _not_impl def _is_Intel(self): return self.info[0]['vendor_id']=='GenuineIntel' def _is_i486(self): return self.info[0]['cpu']=='i486' def _is_i586(self): return self.is_Intel() and self.info[0]['cpu family'] == '5' def _is_i686(self): return self.is_Intel() and self.info[0]['cpu family'] == '6' def _is_Celeron(self): return re.match(r'.*?Celeron', self.info[0]['model name']) is not None def _is_Pentium(self): return re.match(r'.*?Pentium', self.info[0]['model name']) is not None def _is_PentiumII(self): return re.match(r'.*?Pentium.*?II\b', self.info[0]['model name']) is not None def _is_PentiumPro(self): return re.match(r'.*?PentiumPro\b', self.info[0]['model name']) is not None def _is_PentiumMMX(self): return re.match(r'.*?Pentium.*?MMX\b', self.info[0]['model name']) is not None def _is_PentiumIII(self): return re.match(r'.*?Pentium.*?III\b', self.info[0]['model name']) is not None def _is_PentiumIV(self): return re.match(r'.*?Pentium.*?(IV|4)\b', self.info[0]['model name']) is not None def _is_PentiumM(self): return re.match(r'.*?Pentium.*?M\b', self.info[0]['model name']) is not None def _is_Prescott(self): return self.is_PentiumIV() and self.has_sse3() def _is_Nocona(self): return self.is_Intel() \ and (self.info[0]['cpu family'] == '6' \ or self.info[0]['cpu family'] == '15' ) \ and (self.has_sse3() and not self.has_ssse3())\ and re.match(r'.*?\blm\b',self.info[0]['flags']) is not None def _is_Core2(self): return self.is_64bit() and self.is_Intel() and \ re.match(r'.*?Core\(TM\)2\b', \ self.info[0]['model name']) is not None def _is_Itanium(self): return re.match(r'.*?Itanium\b', self.info[0]['family']) is not None def _is_XEON(self): return re.match(r'.*?XEON\b', self.info[0]['model name'],re.IGNORECASE) is not None _is_Xeon = _is_XEON # Varia def _is_singleCPU(self): return len(self.info) == 1 def _getNCPUs(self): return len(self.info) def _has_fdiv_bug(self): return self.info[0]['fdiv_bug']=='yes' def _has_f00f_bug(self): return self.info[0]['f00f_bug']=='yes' def _has_mmx(self): return re.match(r'.*?\bmmx\b',self.info[0]['flags']) is not None def _has_sse(self): return re.match(r'.*?\bsse\b',self.info[0]['flags']) is not None def _has_sse2(self): return re.match(r'.*?\bsse2\b',self.info[0]['flags']) is not None def _has_sse3(self): return re.match(r'.*?\bpni\b',self.info[0]['flags']) is not None def _has_ssse3(self): return re.match(r'.*?\bssse3\b',self.info[0]['flags']) is not None def _has_3dnow(self): return re.match(r'.*?\b3dnow\b',self.info[0]['flags']) is not None def _has_3dnowext(self): return re.match(r'.*?\b3dnowext\b',self.info[0]['flags']) is not None class IRIXCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = key_value_from_command('sysconf', sep=' ', successful_status=(0,1)) self.__class__.info = info def _not_impl(self): pass def _is_singleCPU(self): return self.info.get('NUM_PROCESSORS') == '1' def _getNCPUs(self): return int(self.info.get('NUM_PROCESSORS', 1)) def __cputype(self,n): return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) def _is_r2000(self): return self.__cputype(2000) def _is_r3000(self): return self.__cputype(3000) def _is_r3900(self): return self.__cputype(3900) def _is_r4000(self): return self.__cputype(4000) def _is_r4100(self): return self.__cputype(4100) def _is_r4300(self): return self.__cputype(4300) def _is_r4400(self): return self.__cputype(4400) def _is_r4600(self): return self.__cputype(4600) def _is_r4650(self): return self.__cputype(4650) def _is_r5000(self): return self.__cputype(5000) def _is_r6000(self): return self.__cputype(6000) def _is_r8000(self): return self.__cputype(8000) def _is_r10000(self): return self.__cputype(10000) def _is_r12000(self): return self.__cputype(12000) def _is_rorion(self): return self.__cputype('orion') def get_ip(self): try: return self.info.get('MACHINE') except: pass def __machine(self,n): return self.info.get('MACHINE').lower() == 'ip%s' % (n) def _is_IP19(self): return self.__machine(19) def _is_IP20(self): return self.__machine(20) def _is_IP21(self): return self.__machine(21) def _is_IP22(self): return self.__machine(22) def _is_IP22_4k(self): return self.__machine(22) and self._is_r4000() def _is_IP22_5k(self): return self.__machine(22) and self._is_r5000() def _is_IP24(self): return self.__machine(24) def _is_IP25(self): return self.__machine(25) def _is_IP26(self): return self.__machine(26) def _is_IP27(self): return self.__machine(27) def _is_IP28(self): return self.__machine(28) def _is_IP30(self): return self.__machine(30) def _is_IP32(self): return self.__machine(32) def _is_IP32_5k(self): return self.__machine(32) and self._is_r5000() def _is_IP32_10k(self): return self.__machine(32) and self._is_r10000() class DarwinCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = command_info(arch='arch', machine='machine') info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') self.__class__.info = info def _not_impl(self): pass def _getNCPUs(self): return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) def _is_Power_Macintosh(self): return self.info['sysctl_hw']['hw.machine']=='Power Macintosh' def _is_i386(self): return self.info['arch']=='i386' def _is_ppc(self): return self.info['arch']=='ppc' def __machine(self,n): return self.info['machine'] == 'ppc%s'%n def _is_ppc601(self): return self.__machine(601) def _is_ppc602(self): return self.__machine(602) def _is_ppc603(self): return self.__machine(603) def _is_ppc603e(self): return self.__machine('603e') def _is_ppc604(self): return self.__machine(604) def _is_ppc604e(self): return self.__machine('604e') def _is_ppc620(self): return self.__machine(620) def _is_ppc630(self): return self.__machine(630) def _is_ppc740(self): return self.__machine(740) def _is_ppc7400(self): return self.__machine(7400) def _is_ppc7450(self): return self.__machine(7450) def _is_ppc750(self): return self.__machine(750) def _is_ppc403(self): return self.__machine(403) def _is_ppc505(self): return self.__machine(505) def _is_ppc801(self): return self.__machine(801) def _is_ppc821(self): return self.__machine(821) def _is_ppc823(self): return self.__machine(823) def _is_ppc860(self): return self.__machine(860) class SunOSCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = command_info(arch='arch', mach='mach', uname_i='uname_i', isainfo_b=['isainfo', '-b'], isainfo_n=['isainfo' ,'-n'], ) info['uname_X'] = key_value_from_command('uname -X', sep='=') for line in command_by_line(['psrinfo', '-v', '0']): m = re.match(r'\s*The (?P

[\w\d]+) processor operates at', line) if m: info['processor'] = m.group('p') break self.__class__.info = info def _not_impl(self): pass def _is_i386(self): return self.info['isainfo_n']=='i386' def _is_sparc(self): return self.info['isainfo_n']=='sparc' def _is_sparcv9(self): return self.info['isainfo_n']=='sparcv9' def _getNCPUs(self): return int(self.info['uname_X'].get('NumCPU', 1)) def _is_sun4(self): return self.info['arch']=='sun4' def _is_SUNW(self): return re.match(r'SUNW',self.info['uname_i']) is not None def _is_sparcstation5(self): return re.match(r'.*SPARCstation-5',self.info['uname_i']) is not None def _is_ultra1(self): return re.match(r'.*Ultra-1',self.info['uname_i']) is not None def _is_ultra250(self): return re.match(r'.*Ultra-250',self.info['uname_i']) is not None def _is_ultra2(self): return re.match(r'.*Ultra-2',self.info['uname_i']) is not None def _is_ultra30(self): return re.match(r'.*Ultra-30',self.info['uname_i']) is not None def _is_ultra4(self): return re.match(r'.*Ultra-4',self.info['uname_i']) is not None def _is_ultra5_10(self): return re.match(r'.*Ultra-5_10',self.info['uname_i']) is not None def _is_ultra5(self): return re.match(r'.*Ultra-5',self.info['uname_i']) is not None def _is_ultra60(self): return re.match(r'.*Ultra-60',self.info['uname_i']) is not None def _is_ultra80(self): return re.match(r'.*Ultra-80',self.info['uname_i']) is not None def _is_ultraenterprice(self): return re.match(r'.*Ultra-Enterprise',self.info['uname_i']) is not None def _is_ultraenterprice10k(self): return re.match(r'.*Ultra-Enterprise-10000',self.info['uname_i']) is not None def _is_sunfire(self): return re.match(r'.*Sun-Fire',self.info['uname_i']) is not None def _is_ultra(self): return re.match(r'.*Ultra',self.info['uname_i']) is not None def _is_cpusparcv7(self): return self.info['processor']=='sparcv7' def _is_cpusparcv8(self): return self.info['processor']=='sparcv8' def _is_cpusparcv9(self): return self.info['processor']=='sparcv9' class Win32CPUInfo(CPUInfoBase): info = None pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" # XXX: what does the value of # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 # mean? def __init__(self): if self.info is not None: return info = [] try: #XXX: Bad style to use so long `try:...except:...`. Fix it! import _winreg prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)"\ "\s+stepping\s+(?P\d+)",re.IGNORECASE) chnd=_winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) pnum=0 while 1: try: proc=_winreg.EnumKey(chnd,pnum) except _winreg.error: break else: pnum+=1 info.append({"Processor":proc}) phnd=_winreg.OpenKey(chnd,proc) pidx=0 while True: try: name,value,vtpe=_winreg.EnumValue(phnd,pidx) except _winreg.error: break else: pidx=pidx+1 info[-1][name]=value if name=="Identifier": srch=prgx.search(value) if srch: info[-1]["Family"]=int(srch.group("FML")) info[-1]["Model"]=int(srch.group("MDL")) info[-1]["Stepping"]=int(srch.group("STP")) except: print sys.exc_value,'(ignoring)' self.__class__.info = info def _not_impl(self): pass # Athlon def _is_AMD(self): return self.info[0]['VendorIdentifier']=='AuthenticAMD' def _is_Am486(self): return self.is_AMD() and self.info[0]['Family']==4 def _is_Am5x86(self): return self.is_AMD() and self.info[0]['Family']==4 def _is_AMDK5(self): return self.is_AMD() and self.info[0]['Family']==5 \ and self.info[0]['Model'] in [0,1,2,3] def _is_AMDK6(self): return self.is_AMD() and self.info[0]['Family']==5 \ and self.info[0]['Model'] in [6,7] def _is_AMDK6_2(self): return self.is_AMD() and self.info[0]['Family']==5 \ and self.info[0]['Model']==8 def _is_AMDK6_3(self): return self.is_AMD() and self.info[0]['Family']==5 \ and self.info[0]['Model']==9 def _is_AMDK7(self): return self.is_AMD() and self.info[0]['Family'] == 6 # To reliably distinguish between the different types of AMD64 chips # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would # require looking at the 'brand' from cpuid def _is_AMD64(self): return self.is_AMD() and self.info[0]['Family'] == 15 # Intel def _is_Intel(self): return self.info[0]['VendorIdentifier']=='GenuineIntel' def _is_i386(self): return self.info[0]['Family']==3 def _is_i486(self): return self.info[0]['Family']==4 def _is_i586(self): return self.is_Intel() and self.info[0]['Family']==5 def _is_i686(self): return self.is_Intel() and self.info[0]['Family']==6 def _is_Pentium(self): return self.is_Intel() and self.info[0]['Family']==5 def _is_PentiumMMX(self): return self.is_Intel() and self.info[0]['Family']==5 \ and self.info[0]['Model']==4 def _is_PentiumPro(self): return self.is_Intel() and self.info[0]['Family']==6 \ and self.info[0]['Model']==1 def _is_PentiumII(self): return self.is_Intel() and self.info[0]['Family']==6 \ and self.info[0]['Model'] in [3,5,6] def _is_PentiumIII(self): return self.is_Intel() and self.info[0]['Family']==6 \ and self.info[0]['Model'] in [7,8,9,10,11] def _is_PentiumIV(self): return self.is_Intel() and self.info[0]['Family']==15 def _is_PentiumM(self): return self.is_Intel() and self.info[0]['Family'] == 6 \ and self.info[0]['Model'] in [9, 13, 14] def _is_Core2(self): return self.is_Intel() and self.info[0]['Family'] == 6 \ and self.info[0]['Model'] in [15, 16, 17] # Varia def _is_singleCPU(self): return len(self.info) == 1 def _getNCPUs(self): return len(self.info) def _has_mmx(self): if self.is_Intel(): return (self.info[0]['Family']==5 and self.info[0]['Model']==4) \ or (self.info[0]['Family'] in [6,15]) elif self.is_AMD(): return self.info[0]['Family'] in [5,6,15] else: return False def _has_sse(self): if self.is_Intel(): return (self.info[0]['Family']==6 and \ self.info[0]['Model'] in [7,8,9,10,11]) \ or self.info[0]['Family']==15 elif self.is_AMD(): return (self.info[0]['Family']==6 and \ self.info[0]['Model'] in [6,7,8,10]) \ or self.info[0]['Family']==15 else: return False def _has_sse2(self): if self.is_Intel(): return self.is_Pentium4() or self.is_PentiumM() \ or self.is_Core2() elif self.is_AMD(): return self.is_AMD64() else: return False def _has_3dnow(self): return self.is_AMD() and self.info[0]['Family'] in [5,6,15] def _has_3dnowext(self): return self.is_AMD() and self.info[0]['Family'] in [6,15] if sys.platform.startswith('linux'): # variations: linux2,linux-i386 (any others?) cpuinfo = LinuxCPUInfo elif sys.platform.startswith('irix'): cpuinfo = IRIXCPUInfo elif sys.platform == 'darwin': cpuinfo = DarwinCPUInfo elif sys.platform.startswith('sunos'): cpuinfo = SunOSCPUInfo elif sys.platform.startswith('win32'): cpuinfo = Win32CPUInfo elif sys.platform.startswith('cygwin'): cpuinfo = LinuxCPUInfo #XXX: other OS's. Eg. use _winreg on Win32. Or os.uname on unices. else: cpuinfo = CPUInfoBase cpu = cpuinfo() if __name__ == "__main__": cpu.is_blaa() cpu.is_Intel() cpu.is_Alpha() print 'CPU information:', for name in dir(cpuinfo): if name[0]=='_' and name[1]!='_': r = getattr(cpu,name[1:])() if r: if r!=1: print '%s=%s' %(name[1:],r), else: print name[1:], print numexpr-2.2.2/numexpr/expressions.py0000644000175000001440000003742712221256772020326 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### __all__ = ['E'] import operator import sys import threading import numpy # Declare a double type that does not exist in Python space double = numpy.double # The default kind for undeclared variables default_kind = 'double' if sys.version_info[0] < 3: int_ = int long_ = long else: int_ = numpy.int32 long_ = numpy.int64 type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', double: 'double', complex: 'complex', bytes: 'bytes'} kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, 'double': double, 'complex': complex, 'bytes': bytes} kind_rank = ['bool', 'int', 'long', 'float', 'double', 'complex', 'none'] scalar_constant_types = [bool, int_, long, float, double, complex, bytes] # Final corrections for Python 3 (mainly for PyTables needs) if sys.version_info[0] > 2: type_to_kind[str] = 'str' kind_to_type['str'] = str scalar_constant_types.append(str) scalar_constant_types = tuple(scalar_constant_types) from numexpr import interpreter class Expression(object): def __init__(self): object.__init__(self) def __getattr__(self, name): if name.startswith('_'): return self.__dict__[name] else: return VariableNode(name, default_kind) E = Expression() class Context(threading.local): initialized = False def __init__(self, dict_): if self.initialized: raise SystemError('__init__ called too many times') self.initialized = True self.__dict__.update(dict_) def get(self, value, default): return self.__dict__.get(value, default) def get_current_context(self): return self.__dict__ def set_new_context(self, dict_): self.__dict__.update(dict_) # This will be called each time the local object is used in a separate thread _context = Context({}) def get_optimization(): return _context.get('optimization', 'none') # helper functions for creating __magic__ methods def ophelper(f): def func(*args): args = list(args) for i, x in enumerate(args): if isConstant(x): args[i] = x = ConstantNode(x) if not isinstance(x, ExpressionNode): raise TypeError("unsupported object type: %s" % type(x)) return f(*args) func.__name__ = f.__name__ func.__doc__ = f.__doc__ func.__dict__.update(f.__dict__) return func def allConstantNodes(args): "returns True if args are all ConstantNodes." for x in args: if not isinstance(x, ConstantNode): return False return True def isConstant(ex): "Returns True if ex is a constant scalar of an allowed type." return isinstance(ex, scalar_constant_types) def commonKind(nodes): node_kinds = [node.astKind for node in nodes] str_count = node_kinds.count('bytes') + node_kinds.count('str') if 0 < str_count < len(node_kinds): # some args are strings, but not all raise TypeError("strings can only be operated with strings") if str_count > 0: # if there are some, all of them must be return 'bytes' n = -1 for x in nodes: n = max(n, kind_rank.index(x.astKind)) return kind_rank[n] max_int32 = 2147483647 min_int32 = -max_int32 - 1 def bestConstantType(x): # ``numpy.string_`` is a subclass of ``bytes`` if isinstance(x, (bytes, str)): return bytes # Numeric conversion to boolean values is not tried because # ``bool(1) == True`` (same for 0 and False), so 0 and 1 would be # interpreted as booleans when ``False`` and ``True`` are already # supported. if isinstance(x, (bool, numpy.bool_)): return bool # ``long`` objects are kept as is to allow the user to force # promotion of results by using long constants, e.g. by operating # a 32-bit array with a long (64-bit) constant. if isinstance(x, (long_, numpy.int64)): return long_ # ``double`` objects are kept as is to allow the user to force # promotion of results by using double constants, e.g. by operating # a float (32-bit) array with a double (64-bit) constant. if isinstance(x, double): return double if isinstance(x, (int, numpy.integer)): # Constants needing more than 32 bits are always # considered ``long``, *regardless of the platform*, so we # can clearly tell 32- and 64-bit constants apart. if not (min_int32 <= x <= max_int32): return long_ return int_ # The duality of float and double in Python avoids that we have to list # ``double`` too. for converter in float, complex: try: y = converter(x) except StandardError, err: continue if y == x: return converter def getKind(x): converter = bestConstantType(x) return type_to_kind[converter] def binop(opname, reversed=False, kind=None): # Getting the named method from self (after reversal) does not # always work (e.g. int constants do not have a __lt__ method). opfunc = getattr(operator, "__%s__" % opname) @ophelper def operation(self, other): if reversed: self, other = other, self if allConstantNodes([self, other]): return ConstantNode(opfunc(self.value, other.value)) else: return OpNode(opname, (self, other), kind=kind) return operation def func(func, minkind=None, maxkind=None): @ophelper def function(*args): if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) if kind in ('int', 'long'): # Exception for following NumPy casting rules #FIXME: this is not always desirable. The following # functions which return ints (for int inputs) on numpy # but not on numexpr: copy, abs, fmod, ones_like kind = 'double' else: # Apply regular casting rules if minkind and kind_rank.index(minkind) > kind_rank.index(kind): kind = minkind if maxkind and kind_rank.index(maxkind) < kind_rank.index(kind): kind = maxkind return FuncNode(func.__name__, args, kind) return function @ophelper def where_func(a, b, c): if isinstance(a, ConstantNode): #FIXME: This prevents where(True, a, b) raise ValueError("too many dimensions") if allConstantNodes([a,b,c]): return ConstantNode(numpy.where(a, b, c)) return FuncNode('where', [a,b,c]) def encode_axis(axis): if isinstance(axis, ConstantNode): axis = axis.value if axis is None: axis = interpreter.allaxes else: if axis < 0: raise ValueError("negative axis are not supported") if axis > 254: raise ValueError("cannot encode axis") return RawNode(axis) def sum_func(a, axis=None): axis = encode_axis(axis) if isinstance(a, ConstantNode): return a if isinstance(a, (bool, int_, long_, float, double, complex)): a = ConstantNode(a) return FuncNode('sum', [a, axis], kind=a.astKind) def prod_func(a, axis=None): axis = encode_axis(axis) if isinstance(a, (bool, int_, long_, float, double, complex)): a = ConstantNode(a) if isinstance(a, ConstantNode): return a return FuncNode('prod', [a, axis], kind=a.astKind) @ophelper def div_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and a.astKind in ('float', 'double', 'complex')): return OpNode('mul', [a, ConstantNode(1./b.value)]) return OpNode('div', [a,b]) @ophelper def truediv_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and a.astKind in ('float', 'double', 'complex')): return OpNode('mul', [a, ConstantNode(1./b.value)]) kind = commonKind([a, b]) if kind in ('bool', 'int', 'long'): kind = 'double' return OpNode('div', [a, b], kind=kind) @ophelper def rtruediv_op(a, b): return truediv_op(b, a) @ophelper def pow_op(a, b): if allConstantNodes([a, b]): return ConstantNode(a**b) if isinstance(b, ConstantNode): x = b.value if get_optimization() == 'aggressive': RANGE = 50 # Approximate break even point with pow(x,y) # Optimize all integral and half integral powers in [-RANGE, RANGE] # Note: for complex numbers RANGE could be larger. if (int(2*x) == 2*x) and (-RANGE <= abs(x) <= RANGE): n = int_(abs(x)) ishalfpower = int_(abs(2*x)) % 2 def multiply(x, y): if x is None: return y return OpNode('mul', [x, y]) r = None p = a mask = 1 while True: if (n & mask): r = multiply(r, p) mask <<= 1 if mask > n: break p = OpNode('mul', [p,p]) if ishalfpower: kind = commonKind([a]) if kind in ('int', 'long'): kind = 'double' r = multiply(r, OpNode('sqrt', [a], kind)) if r is None: r = OpNode('ones_like', [a]) if x < 0: r = OpNode('div', [ConstantNode(1), r]) return r if get_optimization() in ('moderate', 'aggressive'): if x == -1: return OpNode('div', [ConstantNode(1),a]) if x == 0: return OpNode('ones_like', [a]) if x == 0.5: kind = a.astKind if kind in ('int', 'long'): kind = 'double' return FuncNode('sqrt', [a], kind=kind) if x == 1: return a if x == 2: return OpNode('mul', [a,a]) return OpNode('pow', [a,b]) # The functions and the minimum and maximum types accepted functions = { 'copy' : func(numpy.copy), 'ones_like' : func(numpy.ones_like), 'sqrt' : func(numpy.sqrt, 'float'), 'sin' : func(numpy.sin, 'float'), 'cos' : func(numpy.cos, 'float'), 'tan' : func(numpy.tan, 'float'), 'arcsin' : func(numpy.arcsin, 'float'), 'arccos' : func(numpy.arccos, 'float'), 'arctan' : func(numpy.arctan, 'float'), 'sinh' : func(numpy.sinh, 'float'), 'cosh' : func(numpy.cosh, 'float'), 'tanh' : func(numpy.tanh, 'float'), 'arcsinh' : func(numpy.arcsinh, 'float'), 'arccosh' : func(numpy.arccosh, 'float'), 'arctanh' : func(numpy.arctanh, 'float'), 'fmod' : func(numpy.fmod, 'float'), 'arctan2' : func(numpy.arctan2, 'float'), 'log' : func(numpy.log, 'float'), 'log1p' : func(numpy.log1p, 'float'), 'log10' : func(numpy.log10, 'float'), 'exp' : func(numpy.exp, 'float'), 'expm1' : func(numpy.expm1, 'float'), 'abs': func(numpy.absolute, 'float'), 'where' : where_func, 'real' : func(numpy.real, 'double', 'double'), 'imag' : func(numpy.imag, 'double', 'double'), 'complex' : func(complex, 'complex'), 'sum' : sum_func, 'prod' : prod_func, } class ExpressionNode(object): """An object that represents a generic number object. This implements the number special methods so that we can keep track of how this object has been used. """ astType = 'generic' def __init__(self, value=None, kind=None, children=None): object.__init__(self) self.value = value if kind is None: kind = 'none' self.astKind = kind if children is None: self.children = () else: self.children = tuple(children) def get_real(self): if self.astType == 'constant': return ConstantNode(complex(self.value).real) return OpNode('real', (self,), 'double') real = property(get_real) def get_imag(self): if self.astType == 'constant': return ConstantNode(complex(self.value).imag) return OpNode('imag', (self,), 'double') imag = property(get_imag) def __str__(self): return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, self.astKind, self.children) def __repr__(self): return self.__str__() def __neg__(self): return OpNode('neg', (self,)) def __invert__(self): return OpNode('invert', (self,)) def __pos__(self): return self # The next check is commented out. This can be uncommented when # numexpr would drop compatibility with Python < 3. # See #24 for more info. # def __nonzero__(self): # raise TypeError("You can't use Python's standard boolean operators in " # "NumExpr expressions. You should use their bitwise " # "counterparts instead: '&' instead of 'and', " # "'|' instead of 'or', and '~' instead of 'not'.") __add__ = __radd__ = binop('add') __sub__ = binop('sub') __rsub__ = binop('sub', reversed=True) __mul__ = __rmul__ = binop('mul') if sys.version_info[0] < 3: __div__ = div_op __rdiv__ = binop('div', reversed=True) __truediv__ = truediv_op __rtruediv__ = rtruediv_op __pow__ = pow_op __rpow__ = binop('pow', reversed=True) __mod__ = binop('mod') __rmod__ = binop('mod', reversed=True) # boolean operations __and__ = binop('and', kind='bool') __or__ = binop('or', kind='bool') __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') __eq__ = binop('eq', kind='bool') __ne__ = binop('ne', kind='bool') __lt__ = binop('gt', reversed=True, kind='bool') __le__ = binop('ge', reversed=True, kind='bool') class LeafNode(ExpressionNode): leafNode = True class VariableNode(LeafNode): astType = 'variable' def __init__(self, value=None, kind=None, children=None): LeafNode.__init__(self, value=value, kind=kind) class RawNode(object): """Used to pass raw integers to interpreter. For instance, for selecting what function to use in func1. Purposely don't inherit from ExpressionNode, since we don't wan't this to be used for anything but being walked. """ astType = 'raw' astKind = 'none' def __init__(self, value): self.value = value self.children = () def __str__(self): return 'RawNode(%s)' % (self.value,) __repr__ = __str__ class ConstantNode(LeafNode): astType = 'constant' def __init__(self, value=None, children=None): kind = getKind(value) # Python float constants are double precision by default if kind == 'float': kind = 'double' LeafNode.__init__(self, value=value, kind=kind) def __neg__(self): return ConstantNode(-self.value) def __invert__(self): return ConstantNode(~self.value) class OpNode(ExpressionNode): astType = 'op' def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) ExpressionNode.__init__(self, value=opcode, kind=kind, children=args) class FuncNode(OpNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) OpNode.__init__(self, opcode, args, kind) numexpr-2.2.2/numexpr/numexpr_object.hpp0000644000175000001440000000205512132261472021106 0ustar faltetusers00000000000000#ifndef NUMEXPR_OBJECT_HPP #define NUMEXPR_OBJECT_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ struct NumExprObject { PyObject_HEAD PyObject *signature; /* a python string */ PyObject *tempsig; PyObject *constsig; PyObject *fullsig; PyObject *program; /* a python string */ PyObject *constants; /* a tuple of int/float/complex */ PyObject *input_names; /* tuple of strings */ char **mem; /* pointers to registers */ char *rawmem; /* a chunks of raw memory for storing registers */ npy_intp *memsteps; npy_intp *memsizes; int rawmemsize; int n_inputs; int n_constants; int n_temps; }; extern PyTypeObject NumExprType; #endif // NUMEXPR_OBJECT_HPP numexpr-2.2.2/numexpr/module.hpp0000644000175000001440000000311212132261472017342 0ustar faltetusers00000000000000#ifndef NUMEXPR_MODULE_HPP #define NUMEXPR_MODULE_HPP // Deal with the clunky numpy import mechanism // by inverting the logic of the NO_IMPORT_ARRAY symbol. #define PY_ARRAY_UNIQUE_SYMBOL numexpr_ARRAY_API #ifndef DO_NUMPY_IMPORT_ARRAY # define NO_IMPORT_ARRAY #endif #include #include #include #include "numexpr_config.hpp" struct global_state { /* Global variables for threads */ int nthreads; /* number of desired threads in pool */ int init_threads_done; /* pool of threads initialized? */ int end_threads; /* should exisiting threads end? */ pthread_t threads[MAX_THREADS]; /* opaque structure for threads */ int tids[MAX_THREADS]; /* ID per each thread */ npy_intp gindex; /* global index for all threads */ int init_sentinels_done; /* sentinels initialized? */ int giveup; /* should parallel code giveup? */ int force_serial; /* force serial code instead of parallel? */ int pid; /* the PID for this process */ /* Syncronization variables */ pthread_mutex_t count_mutex; int count_threads; pthread_mutex_t count_threads_mutex; pthread_cond_t count_threads_cv; global_state() { nthreads = 1; init_threads_done = 0; end_threads = 0; pid = 0; } }; extern global_state gs; int numexpr_set_nthreads(int nthreads_new); #endif // NUMEXPR_MODULE_HPP numexpr-2.2.2/numexpr/functions.hpp0000644000175000001440000001224312132261472020072 0ustar faltetusers00000000000000// -*- c-mode -*- /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* These #if blocks make it easier to query this file, without having to define every row function before #including it. */ #ifndef FUNC_FF #define ELIDE_FUNC_FF #define FUNC_FF(...) #endif FUNC_FF(FUNC_SQRT_FF, "sqrt_ff", sqrtf, sqrtf2, vsSqrt) FUNC_FF(FUNC_SIN_FF, "sin_ff", sinf, sinf2, vsSin) FUNC_FF(FUNC_COS_FF, "cos_ff", cosf, cosf2, vsCos) FUNC_FF(FUNC_TAN_FF, "tan_ff", tanf, tanf2, vsTan) FUNC_FF(FUNC_ARCSIN_FF, "arcsin_ff", asinf, asinf2, vsAsin) FUNC_FF(FUNC_ARCCOS_FF, "arccos_ff", acosf, acosf2, vsAcos) FUNC_FF(FUNC_ARCTAN_FF, "arctan_ff", atanf, atanf2, vsAtan) FUNC_FF(FUNC_SINH_FF, "sinh_ff", sinhf, sinhf2, vsSinh) FUNC_FF(FUNC_COSH_FF, "cosh_ff", coshf, coshf2, vsCosh) FUNC_FF(FUNC_TANH_FF, "tanh_ff", tanhf, tanhf2, vsTanh) FUNC_FF(FUNC_ARCSINH_FF, "arcsinh_ff", asinhf, asinhf2, vsAsinh) FUNC_FF(FUNC_ARCCOSH_FF, "arccosh_ff", acoshf, acoshf2, vsAcosh) FUNC_FF(FUNC_ARCTANH_FF, "arctanh_ff", atanhf, atanhf2, vsAtanh) FUNC_FF(FUNC_LOG_FF, "log_ff", logf, logf2, vsLn) FUNC_FF(FUNC_LOG1P_FF, "log1p_ff", log1pf, log1pf2, vsLog1p) FUNC_FF(FUNC_LOG10_FF, "log10_ff", log10f, log10f2, vsLog10) FUNC_FF(FUNC_EXP_FF, "exp_ff", expf, expf2, vsExp) FUNC_FF(FUNC_EXPM1_FF, "expm1_ff", expm1f, expm1f, vsExpm1) FUNC_FF(FUNC_ABS_FF, "absolute_ff", fabsf, fabsf2, vsAbs) FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FF #undef ELIDE_FUNC_FF #undef FUNC_FF #endif #ifndef FUNC_FFF #define ELIDE_FUNC_FFF #define FUNC_FFF(...) #endif FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF #undef FUNC_FFF #endif #ifndef FUNC_DD #define ELIDE_FUNC_DD #define FUNC_DD(...) #endif FUNC_DD(FUNC_SQRT_DD, "sqrt_dd", sqrt, vdSqrt) FUNC_DD(FUNC_SIN_DD, "sin_dd", sin, vdSin) FUNC_DD(FUNC_COS_DD, "cos_dd", cos, vdCos) FUNC_DD(FUNC_TAN_DD, "tan_dd", tan, vdTan) FUNC_DD(FUNC_ARCSIN_DD, "arcsin_dd", asin, vdAsin) FUNC_DD(FUNC_ARCCOS_DD, "arccos_dd", acos, vdAcos) FUNC_DD(FUNC_ARCTAN_DD, "arctan_dd", atan, vdAtan) FUNC_DD(FUNC_SINH_DD, "sinh_dd", sinh, vdSinh) FUNC_DD(FUNC_COSH_DD, "cosh_dd", cosh, vdCosh) FUNC_DD(FUNC_TANH_DD, "tanh_dd", tanh, vdTanh) FUNC_DD(FUNC_ARCSINH_DD, "arcsinh_dd", asinh, vdAsinh) FUNC_DD(FUNC_ARCCOSH_DD, "arccosh_dd", acosh, vdAcosh) FUNC_DD(FUNC_ARCTANH_DD, "arctanh_dd", atanh, vdAtanh) FUNC_DD(FUNC_LOG_DD, "log_dd", log, vdLn) FUNC_DD(FUNC_LOG1P_DD, "log1p_dd", log1p, vdLog1p) FUNC_DD(FUNC_LOG10_DD, "log10_dd", log10, vdLog10) FUNC_DD(FUNC_EXP_DD, "exp_dd", exp, vdExp) FUNC_DD(FUNC_EXPM1_DD, "expm1_dd", expm1, vdExpm1) FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DD #undef ELIDE_FUNC_DD #undef FUNC_DD #endif #ifndef FUNC_DDD #define ELIDE_FUNC_DDD #define FUNC_DDD(...) #endif FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD #undef FUNC_DDD #endif #ifndef FUNC_CC #define ELIDE_FUNC_CC #define FUNC_CC(...) #endif FUNC_CC(FUNC_SQRT_CC, "sqrt_cc", nc_sqrt, vzSqrt) FUNC_CC(FUNC_SIN_CC, "sin_cc", nc_sin, vzSin) FUNC_CC(FUNC_COS_CC, "cos_cc", nc_cos, vzCos) FUNC_CC(FUNC_TAN_CC, "tan_cc", nc_tan, vzTan) FUNC_CC(FUNC_ARCSIN_CC, "arcsin_cc", nc_asin, vzAsin) FUNC_CC(FUNC_ARCCOS_CC, "arccos_cc", nc_acos, vzAcos) FUNC_CC(FUNC_ARCTAN_CC, "arctan_cc", nc_atan, vzAtan) FUNC_CC(FUNC_SINH_CC, "sinh_cc", nc_sinh, vzSinh) FUNC_CC(FUNC_COSH_CC, "cosh_cc", nc_cosh, vzCosh) FUNC_CC(FUNC_TANH_CC, "tanh_cc", nc_tanh, vzTanh) FUNC_CC(FUNC_ARCSINH_CC, "arcsinh_cc", nc_asinh, vzAsinh) FUNC_CC(FUNC_ARCCOSH_CC, "arccosh_cc", nc_acosh, vzAcosh) FUNC_CC(FUNC_ARCTANH_CC, "arctanh_cc", nc_atanh, vzAtanh) FUNC_CC(FUNC_LOG_CC, "log_cc", nc_log, vzLn) FUNC_CC(FUNC_LOG1P_CC, "log1p_cc", nc_log1p, vzLog1p) FUNC_CC(FUNC_LOG10_CC, "log10_cc", nc_log10, vzLog10) FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) FUNC_CC(FUNC_CC_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_CC #undef ELIDE_FUNC_CC #undef FUNC_CC #endif #ifndef FUNC_CCC #define ELIDE_FUNC_CCC #define FUNC_CCC(...) #endif FUNC_CCC(FUNC_POW_CCC, "pow_ccc", nc_pow) FUNC_CCC(FUNC_CCC_LAST, NULL, NULL) #ifdef ELIDE_FUNC_CCC #undef ELIDE_FUNC_CCC #undef FUNC_CCC #endif numexpr-2.2.2/numexpr/interpreter.hpp0000644000175000001440000000466212132261472020433 0ustar faltetusers00000000000000#ifndef NUMEXPR_INTERPRETER_HPP #define NUMEXPR_INTERPRETER_HPP #include "numexpr_config.hpp" // Forward declaration struct NumExprObject; enum OpCodes { #define OPCODE(n, e, ...) e = n, #include "opcodes.hpp" #undef OPCODE }; enum FuncFFCodes { #define FUNC_FF(fop, ...) fop, #include "functions.hpp" #undef FUNC_FF }; enum FuncFFFCodes { #define FUNC_FFF(fop, ...) fop, #include "functions.hpp" #undef FUNC_FFF }; enum FuncDDCodes { #define FUNC_DD(fop, ...) fop, #include "functions.hpp" #undef FUNC_DD }; enum FuncDDDCodes { #define FUNC_DDD(fop, ...) fop, #include "functions.hpp" #undef FUNC_DDD }; enum FuncCCCodes { #define FUNC_CC(fop, ...) fop, #include "functions.hpp" #undef FUNC_CC }; enum FuncCCCCodes { #define FUNC_CCC(fop, ...) fop, #include "functions.hpp" #undef FUNC_CCC }; struct vm_params { int prog_len; unsigned char *program; int n_inputs; int n_constants; int n_temps; unsigned int r_end; char *output; char **inputs; char **mem; npy_intp *memsteps; npy_intp *memsizes; struct index_data *index_data; // Memory for output buffering. If output buffering is unneeded, // it contains NULL. char *out_buffer; }; // Structure for parameters in worker threads struct thread_data { npy_intp start; npy_intp vlen; npy_intp block_size; vm_params params; int ret_code; int *pc_error; char **errmsg; // One memsteps array per thread npy_intp *memsteps[MAX_THREADS]; // One iterator per thread */ NpyIter *iter[MAX_THREADS]; // When doing nested iteration for a reduction NpyIter *reduce_iter[MAX_THREADS]; // Flag indicating reduction is the outer loop instead of the inner bool reduction_outer_loop; // Flag indicating whether output buffering is needed bool need_output_buffering; }; // Global state which holds thread parameters extern thread_data th_params; PyObject *NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds); char get_return_sig(PyObject* program); int check_program(NumExprObject *self); int get_temps_space(const vm_params& params, char **mem, size_t block_size); void free_temps_space(const vm_params& params, char **mem); int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg); #endif // NUMEXPR_INTERPRETER_HPPnumexpr-2.2.2/numexpr/interpreter.cpp0000644000175000001440000012312612221256772020431 0ustar faltetusers00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ #include "module.hpp" #include #include #include #include #include #include "numexpr_config.hpp" #include "complex_functions.hpp" #include "interpreter.hpp" #include "numexpr_object.hpp" using namespace std; // Global state thread_data th_params; /* This file and interp_body should really be generated from a description of the opcodes -- there's too much repetition here for manually editing */ /* bit of a misnomer; includes the return value. */ #define NUMEXPR_MAX_ARGS 4 static char op_signature_table[][NUMEXPR_MAX_ARGS] = { #define Tb 'b' #define Ti 'i' #define Tl 'l' #define Tf 'f' #define Td 'd' #define Tc 'c' #define Ts 's' #define Tn 'n' #define T0 0 #define OPCODE(n, e, ex, rt, a1, a2, a3) {rt, a1, a2, a3}, #include "opcodes.hpp" #undef OPCODE #undef Tb #undef Ti #undef Tl #undef Tf #undef Td #undef Tc #undef Ts #undef Tn #undef T0 }; /* returns the sig of the nth op, '\0' if no more ops -1 on failure */ static int op_signature(int op, unsigned int n) { if (n >= NUMEXPR_MAX_ARGS) { return 0; } if (op < 0 || op > OP_END) { return -1; } return op_signature_table[op][n]; } /* To add a function to the lookup table, add to FUNC_CODES (first group is 1-arg functions, second is 2-arg functions), also to functions_f or functions_ff as appropriate. Finally, use add_func down below to add to funccodes. Functions with more arguments aren't implemented at present, but should be easy; just copy the 1- or 2-arg case. Some functions (for example, sqrt) are repeated in this table that are opcodes, but there's no problem with that as the compiler selects opcodes over functions, and this makes it easier to compare opcode vs. function speeds. */ typedef float (*FuncFFPtr)(float); #ifdef _WIN32 FuncFFPtr functions_ff[] = { #define FUNC_FF(fop, s, f, f_win32, ...) f_win32, #include "functions.hpp" #undef FUNC_FF }; #else FuncFFPtr functions_ff[] = { #define FUNC_FF(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_FF }; #endif #ifdef USE_VML typedef void (*FuncFFPtr_vml)(int, const float*, float*); FuncFFPtr_vml functions_ff_vml[] = { #define FUNC_FF(fop, s, f, f_win32, f_vml) f_vml, #include "functions.hpp" #undef FUNC_FF }; #endif typedef float (*FuncFFFPtr)(float, float); #ifdef _WIN32 FuncFFFPtr functions_fff[] = { #define FUNC_FFF(fop, s, f, f_win32, ...) f_win32, #include "functions.hpp" #undef FUNC_FFF }; #else FuncFFFPtr functions_fff[] = { #define FUNC_FFF(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_FFF }; #endif #ifdef USE_VML /* fmod not available in VML */ static void vsfmod(int n, const float* x1, const float* x2, float* dest) { int j; for(j=0; j < n; j++) { dest[j] = fmod(x1[j], x2[j]); }; }; typedef void (*FuncFFFPtr_vml)(int, const float*, const float*, float*); FuncFFFPtr_vml functions_fff_vml[] = { #define FUNC_FFF(fop, s, f, f_win32, f_vml) f_vml, #include "functions.hpp" #undef FUNC_FFF }; #endif typedef double (*FuncDDPtr)(double); FuncDDPtr functions_dd[] = { #define FUNC_DD(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_DD }; #ifdef USE_VML typedef void (*FuncDDPtr_vml)(int, const double*, double*); FuncDDPtr_vml functions_dd_vml[] = { #define FUNC_DD(fop, s, f, f_vml) f_vml, #include "functions.hpp" #undef FUNC_DD }; #endif typedef double (*FuncDDDPtr)(double, double); FuncDDDPtr functions_ddd[] = { #define FUNC_DDD(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_DDD }; #ifdef USE_VML /* fmod not available in VML */ static void vdfmod(int n, const double* x1, const double* x2, double* dest) { int j; for(j=0; j < n; j++) { dest[j] = fmod(x1[j], x2[j]); }; }; typedef void (*FuncDDDPtr_vml)(int, const double*, const double*, double*); FuncDDDPtr_vml functions_ddd_vml[] = { #define FUNC_DDD(fop, s, f, f_vml) f_vml, #include "functions.hpp" #undef FUNC_DDD }; #endif typedef void (*FuncCCPtr)(npy_cdouble*, npy_cdouble*); FuncCCPtr functions_cc[] = { #define FUNC_CC(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_CC }; #ifdef USE_VML /* complex expm1 not available in VML */ static void vzExpm1(int n, const MKL_Complex16* x1, MKL_Complex16* dest) { int j; vzExp(n, x1, dest); for (j=0; j= NPY_MAXDIMS) axis = NPY_MAXDIMS - axis; return axis; } int check_program(NumExprObject *self) { unsigned char *program; Py_ssize_t prog_len, n_buffers, n_inputs; int pc, arg, argloc, argno, sig; char *fullsig, *signature; if (PyBytes_AsStringAndSize(self->program, (char **)&program, &prog_len) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read program"); return -1; } if (prog_len % 4 != 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: prog_len mod 4 != 0"); return -1; } if (PyBytes_AsStringAndSize(self->fullsig, (char **)&fullsig, &n_buffers) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read fullsig"); return -1; } if (PyBytes_AsStringAndSize(self->signature, (char **)&signature, &n_inputs) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read signature"); return -1; } if (n_buffers > 255) { PyErr_Format(PyExc_RuntimeError, "invalid program: too many buffers"); return -1; } for (pc = 0; pc < prog_len; pc += 4) { unsigned int op = program[pc]; if (op == OP_NOOP) { continue; } if ((op >= OP_REDUCTION) && pc != prog_len-4) { PyErr_Format(PyExc_RuntimeError, "invalid program: reduction operations must occur last"); return -1; } for (argno = 0; ; argno++) { sig = op_signature(op, argno); if (sig == -1) { PyErr_Format(PyExc_RuntimeError, "invalid program: illegal opcode at %i (%d)", pc, op); return -1; } if (sig == 0) break; if (argno < 3) { argloc = pc+argno+1; } if (argno >= 3) { if (pc + 1 >= prog_len) { PyErr_Format(PyExc_RuntimeError, "invalid program: double opcode (%c) at end (%i)", pc, sig); return -1; } argloc = pc+argno+2; } arg = program[argloc]; if (sig != 'n' && ((arg >= n_buffers) || (arg < 0))) { PyErr_Format(PyExc_RuntimeError, "invalid program: buffer out of range (%i) at %i", arg, argloc); return -1; } if (sig == 'n') { if (op == OP_FUNC_FFN) { if (arg < 0 || arg >= FUNC_FF_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_FFFN) { if (arg < 0 || arg >= FUNC_FFF_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_DDN) { if (arg < 0 || arg >= FUNC_DD_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_DDDN) { if (arg < 0 || arg >= FUNC_DDD_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_CCN) { if (arg < 0 || arg >= FUNC_CC_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_CCCN) { if (arg < 0 || arg >= FUNC_CCC_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op >= OP_REDUCTION) { ; } else { PyErr_Format(PyExc_RuntimeError, "invalid program: internal checker errror processing %i", argloc); return -1; } /* The next is to avoid problems with the ('i','l') duality, specially in 64-bit platforms */ } else if (((sig == 'l') && (fullsig[arg] == 'i')) || ((sig == 'i') && (fullsig[arg] == 'l'))) { ; } else if (sig != fullsig[arg]) { PyErr_Format(PyExc_RuntimeError, "invalid : opcode signature doesn't match buffer (%c vs %c) at %i", sig, fullsig[arg], argloc); return -1; } } } return 0; } struct index_data { int count; int size; int findex; npy_intp *shape; npy_intp *strides; int *index; char *buffer; }; // BOUNDS_CHECK is used in interp_body.cpp #define DO_BOUNDS_CHECK 1 #if DO_BOUNDS_CHECK #define BOUNDS_CHECK(arg) if ((arg) >= params.r_end) { \ *pc_error = pc; \ return -2; \ } #else #define BOUNDS_CHECK(arg) #endif int stringcmp(const char *s1, const char *s2, npy_intp maxlen1, npy_intp maxlen2) { npy_intp maxlen, nextpos; /* Point to this when the end of a string is found, to simulate infinte trailing NUL characters. */ const char null = 0; maxlen = (maxlen1 > maxlen2) ? maxlen1 : maxlen2; for (nextpos = 1; nextpos <= maxlen; nextpos++) { if (*s1 < *s2) return -1; if (*s1 > *s2) return +1; s1 = (nextpos >= maxlen1) ? &null : s1+1; s2 = (nextpos >= maxlen2) ? &null : s2+1; } return 0; } /* Get space for VM temporary registers */ int get_temps_space(const vm_params& params, char **mem, size_t block_size) { int r, k = 1 + params.n_inputs + params.n_constants; for (r = k; r < k + params.n_temps; r++) { mem[r] = (char *)malloc(block_size * params.memsizes[r]); if (mem[r] == NULL) { return -1; } } return 0; } /* Free space for VM temporary registers */ void free_temps_space(const vm_params& params, char **mem) { int r, k = 1 + params.n_inputs + params.n_constants; for (r = k; r < k + params.n_temps; r++) { free(mem[r]); } } /* Serial/parallel task iterator version of the VM engine */ int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg) { char **mem = params.mem; NpyIter_IterNextFunc *iternext; npy_intp block_size, *size_ptr; char **iter_dataptr; npy_intp *iter_strides; iternext = NpyIter_GetIterNext(iter, errmsg); if (iternext == NULL) { return -1; } size_ptr = NpyIter_GetInnerLoopSizePtr(iter); iter_dataptr = NpyIter_GetDataPtrArray(iter); iter_strides = NpyIter_GetInnerStrideArray(iter); /* * First do all the blocks with a compile-time fixed size. * This makes a big difference (30-50% on some tests). */ block_size = *size_ptr; while (block_size == BLOCK_SIZE1) { #define REDUCTION_INNER_LOOP #define BLOCK_SIZE BLOCK_SIZE1 #include "interp_body.cpp" #undef BLOCK_SIZE #undef REDUCTION_INNER_LOOP iternext(iter); block_size = *size_ptr; } /* Then finish off the rest */ if (block_size > 0) do { #define REDUCTION_INNER_LOOP #define BLOCK_SIZE block_size #include "interp_body.cpp" #undef BLOCK_SIZE #undef REDUCTION_INNER_LOOP } while (iternext(iter)); return 0; } static int vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg) { char **mem = params.mem; NpyIter_IterNextFunc *iternext; npy_intp block_size, *size_ptr; char **iter_dataptr; npy_intp *iter_strides; iternext = NpyIter_GetIterNext(iter, errmsg); if (iternext == NULL) { return -1; } size_ptr = NpyIter_GetInnerLoopSizePtr(iter); iter_dataptr = NpyIter_GetDataPtrArray(iter); iter_strides = NpyIter_GetInnerStrideArray(iter); /* * First do all the blocks with a compile-time fixed size. * This makes a big difference (30-50% on some tests). */ block_size = *size_ptr; while (block_size == BLOCK_SIZE1) { #define BLOCK_SIZE BLOCK_SIZE1 #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE iternext(iter); block_size = *size_ptr; } /* Then finish off the rest */ if (block_size > 0) do { #define BLOCK_SIZE block_size #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE } while (iternext(iter)); return 0; } /* Parallel iterator version of VM engine */ static int vm_engine_iter_parallel(NpyIter *iter, const vm_params& params, bool need_output_buffering, int *pc_error, char **errmsg) { int i; npy_intp numblocks, taskfactor; if (errmsg == NULL) { return -1; } /* Populate parameters for worker threads */ NpyIter_GetIterIndexRange(iter, &th_params.start, &th_params.vlen); /* * Try to make it so each thread gets 16 tasks. This is a compromise * between 1 task per thread and one block per task. */ taskfactor = 16*BLOCK_SIZE1*gs.nthreads; numblocks = (th_params.vlen - th_params.start + taskfactor - 1) / taskfactor; th_params.block_size = numblocks * BLOCK_SIZE1; th_params.params = params; th_params.need_output_buffering = need_output_buffering; th_params.ret_code = 0; th_params.pc_error = pc_error; th_params.errmsg = errmsg; th_params.iter[0] = iter; /* Make one copy for each additional thread */ for (i = 1; i < gs.nthreads; ++i) { th_params.iter[i] = NpyIter_Copy(iter); if (th_params.iter[i] == NULL) { --i; for (; i > 0; --i) { NpyIter_Deallocate(th_params.iter[i]); } return -1; } } th_params.memsteps[0] = params.memsteps; /* Make one copy of memsteps for each additional thread */ for (i = 1; i < gs.nthreads; ++i) { th_params.memsteps[i] = PyMem_New(npy_intp, 1 + params.n_inputs + params.n_constants + params.n_temps); if (th_params.memsteps[i] == NULL) { --i; for (; i > 0; --i) { PyMem_Del(th_params.memsteps[i]); } for (i = 0; i < gs.nthreads; ++i) { NpyIter_Deallocate(th_params.iter[i]); } return -1; } memcpy(th_params.memsteps[i], th_params.memsteps[0], sizeof(npy_intp) * (1 + params.n_inputs + params.n_constants + params.n_temps)); } Py_BEGIN_ALLOW_THREADS; /* Synchronization point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } else { pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Synchronization point for all threads (wait for finalization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads > 0) { gs.count_threads--; pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } else { pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); Py_END_ALLOW_THREADS; /* Deallocate all the iterator and memsteps copies */ for (i = 1; i < gs.nthreads; ++i) { NpyIter_Deallocate(th_params.iter[i]); PyMem_Del(th_params.memsteps[i]); } return th_params.ret_code; } static int run_interpreter(NumExprObject *self, NpyIter *iter, NpyIter *reduce_iter, bool reduction_outer_loop, bool need_output_buffering, int *pc_error) { int r; Py_ssize_t plen; vm_params params; char *errmsg = NULL; *pc_error = -1; if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), &plen) < 0) { return -1; } params.prog_len = (int)plen; params.output = NULL; params.inputs = NULL; params.index_data = NULL; params.n_inputs = self->n_inputs; params.n_constants = self->n_constants; params.n_temps = self->n_temps; params.mem = self->mem; params.memsteps = self->memsteps; params.memsizes = self->memsizes; params.r_end = (int)PyBytes_Size(self->fullsig); params.out_buffer = NULL; if ((gs.nthreads == 1) || gs.force_serial) { // Can do it as one "task" if (reduce_iter == NULL) { // Allocate memory for output buffering if needed vector out_buffer(need_output_buffering ? (self->memsizes[0] * BLOCK_SIZE1) : 0); params.out_buffer = need_output_buffering ? &out_buffer[0] : NULL; // Reset the iterator to allocate its buffers if(NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; r = vm_engine_iter_task(iter, params.memsteps, params, pc_error, &errmsg); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } else { if (reduction_outer_loop) { char **dataptr; NpyIter_IterNextFunc *iternext; dataptr = NpyIter_GetDataPtrArray(reduce_iter); iternext = NpyIter_GetIterNext(reduce_iter, NULL); if (iternext == NULL) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; do { r = NpyIter_ResetBasePointers(iter, dataptr, &errmsg); if (r >= 0) { r = vm_engine_iter_outer_reduce_task(iter, params.memsteps, params, pc_error, &errmsg); } if (r < 0) { break; } } while (iternext(reduce_iter)); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } else { char **dataptr; NpyIter_IterNextFunc *iternext; dataptr = NpyIter_GetDataPtrArray(iter); iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; do { r = NpyIter_ResetBasePointers(reduce_iter, dataptr, &errmsg); if (r >= 0) { r = vm_engine_iter_task(reduce_iter, params.memsteps, params, pc_error, &errmsg); } if (r < 0) { break; } } while (iternext(iter)); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } } } else { if (reduce_iter == NULL) { r = vm_engine_iter_parallel(iter, params, need_output_buffering, pc_error, &errmsg); } else { errmsg = "Parallel engine doesn't support reduction yet"; r = -1; } } if (r < 0 && errmsg != NULL) { PyErr_SetString(PyExc_RuntimeError, errmsg); } return 0; } static int run_interpreter_const(NumExprObject *self, char *output, int *pc_error) { vm_params params; Py_ssize_t plen; char **mem; npy_intp *memsteps; *pc_error = -1; if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), &plen) < 0) { return -1; } if (self->n_inputs != 0) { return -1; } params.prog_len = (int)plen; params.output = output; params.inputs = NULL; params.index_data = NULL; params.n_inputs = self->n_inputs; params.n_constants = self->n_constants; params.n_temps = self->n_temps; params.mem = self->mem; memsteps = self->memsteps; params.memsizes = self->memsizes; params.r_end = (int)PyBytes_Size(self->fullsig); mem = params.mem; get_temps_space(params, mem, 1); #define SINGLE_ITEM_CONST_LOOP #define BLOCK_SIZE 1 #define NO_OUTPUT_BUFFERING // Because it's constant #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE #undef SINGLE_ITEM_CONST_LOOP free_temps_space(params, mem); return 0; } PyObject * NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds) { PyArrayObject *operands[NPY_MAXARGS]; PyArray_Descr *dtypes[NPY_MAXARGS], **dtypes_tmp; PyObject *tmp, *ret; npy_uint32 op_flags[NPY_MAXARGS]; NPY_CASTING casting = NPY_SAFE_CASTING; NPY_ORDER order = NPY_KEEPORDER; unsigned int i, n_inputs; int r, pc_error = 0; int reduction_axis = -1; npy_intp reduction_size = 1; int ex_uses_vml = 0, is_reduction = 0; bool reduction_outer_loop = false, need_output_buffering = false; // To specify axes when doing a reduction int op_axes_values[NPY_MAXARGS][NPY_MAXDIMS], op_axes_reduction_values[NPY_MAXARGS]; int *op_axes_ptrs[NPY_MAXDIMS]; int oa_ndim = 0; int **op_axes = NULL; NpyIter *iter = NULL, *reduce_iter = NULL; // Check whether we need to restart threads if (!gs.init_threads_done || gs.pid != getpid()) { numexpr_set_nthreads(gs.nthreads); } // Don't force serial mode by default gs.force_serial = 0; // Check whether there's a reduction as the final step is_reduction = last_opcode(self->program) > OP_REDUCTION; n_inputs = (int)PyTuple_Size(args); if (PyBytes_Size(self->signature) != n_inputs) { return PyErr_Format(PyExc_ValueError, "number of inputs doesn't match program"); } else if (n_inputs+1 > NPY_MAXARGS) { return PyErr_Format(PyExc_ValueError, "too many inputs"); } memset(operands, 0, sizeof(operands)); memset(dtypes, 0, sizeof(dtypes)); if (kwds) { tmp = PyDict_GetItemString(kwds, "casting"); // borrowed ref if (tmp != NULL && !PyArray_CastingConverter(tmp, &casting)) { return NULL; } tmp = PyDict_GetItemString(kwds, "order"); // borrowed ref if (tmp != NULL && !PyArray_OrderConverter(tmp, &order)) { return NULL; } tmp = PyDict_GetItemString(kwds, "ex_uses_vml"); // borrowed ref if (tmp == NULL) { return PyErr_Format(PyExc_ValueError, "ex_uses_vml parameter is required"); } if (tmp == Py_True) { ex_uses_vml = 1; } // borrowed ref operands[0] = (PyArrayObject *)PyDict_GetItemString(kwds, "out"); if (operands[0] != NULL) { if ((PyObject *)operands[0] == Py_None) { operands[0] = NULL; } else if (!PyArray_Check(operands[0])) { return PyErr_Format(PyExc_ValueError, "out keyword parameter is not an array"); } else { Py_INCREF(operands[0]); } } } for (i = 0; i < n_inputs; i++) { PyObject *o = PyTuple_GET_ITEM(args, i); // borrowed ref PyObject *a; char c = PyBytes_AS_STRING(self->signature)[i]; int typecode = typecode_from_char(c); // Convert it if it's not an array if (!PyArray_Check(o)) { if (typecode == -1) goto fail; a = PyArray_FROM_OTF(o, typecode, NPY_NOTSWAPPED); } else { Py_INCREF(o); a = o; } operands[i+1] = (PyArrayObject *)a; dtypes[i+1] = PyArray_DescrFromType(typecode); if (operands[0] != NULL) { // Check for the case where "out" is one of the inputs // TODO: Probably should deal with the general overlap case, // but NumPy ufuncs don't do that yet either. if (PyArray_DATA(operands[0]) == PyArray_DATA(operands[i+1])) { need_output_buffering = true; } } if (operands[i+1] == NULL || dtypes[i+1] == NULL) { goto fail; } op_flags[i+1] = NPY_ITER_READONLY| #ifdef USE_VML (ex_uses_vml ? (NPY_ITER_CONTIG|NPY_ITER_ALIGNED) : 0)| #endif #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif NPY_ITER_NBO ; } if (is_reduction) { // A reduction can not result in a string, // so we don't need to worry about item sizes here. char retsig = get_return_sig(self->program); reduction_axis = get_reduction_axis(self->program); // Need to set up op_axes for the non-reduction part if (reduction_axis != 255) { // Get the number of broadcast dimensions for (i = 0; i < n_inputs; ++i) { int ndim = PyArray_NDIM(operands[i+1]); if (ndim > oa_ndim) { oa_ndim = ndim; } } if (reduction_axis < 0 || reduction_axis >= oa_ndim) { PyErr_Format(PyExc_ValueError, "reduction axis is out of bounds"); goto fail; } // Fill in the op_axes op_axes_ptrs[0] = NULL; op_axes_reduction_values[0] = -1; for (i = 0; i < n_inputs; ++i) { int j = 0, idim, ndim = PyArray_NDIM(operands[i+1]); for (idim = 0; idim < oa_ndim-ndim; ++idim) { if (idim != reduction_axis) { op_axes_values[i+1][j++] = -1; } else { op_axes_reduction_values[i+1] = -1; } } for (idim = oa_ndim-ndim; idim < oa_ndim; ++idim) { if (idim != reduction_axis) { op_axes_values[i+1][j++] = idim-(oa_ndim-ndim); } else { npy_intp size = PyArray_DIM(operands[i+1], idim-(oa_ndim-ndim)); if (size > reduction_size) { reduction_size = size; } op_axes_reduction_values[i+1] = idim-(oa_ndim-ndim); } } op_axes_ptrs[i+1] = op_axes_values[i+1]; } // op_axes has one less than the broadcast dimensions --oa_ndim; if (oa_ndim > 0) { op_axes = op_axes_ptrs; } else { reduction_size = 1; } } // A full reduction can be done without nested iteration if (oa_ndim == 0) { if (operands[0] == NULL) { npy_intp dim = 1; operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, typecode_from_char(retsig)); if (!operands[0]) goto fail; } else if (PyArray_SIZE(operands[0]) != 1) { PyErr_Format(PyExc_ValueError, "out argument must have size 1 for a full reduction"); goto fail; } } dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); op_flags[0] = NPY_ITER_READWRITE| NPY_ITER_ALLOCATE| // Copy, because it can't buffer the reduction NPY_ITER_UPDATEIFCOPY| NPY_ITER_NBO| #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif (oa_ndim == 0 ? 0 : NPY_ITER_NO_BROADCAST); } else { char retsig = get_return_sig(self->program); if (retsig != 's') { dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); } else { /* Since the *only* supported operation returning a string * is a copy, the size of returned strings * can be directly gotten from the first (and only) * input/constant/temporary. */ if (n_inputs > 0) { // input, like in 'a' where a -> 'foo' dtypes[0] = PyArray_DESCR(operands[1]); Py_INCREF(dtypes[0]); } else { // constant, like in '"foo"' dtypes[0] = PyArray_DescrNewFromType(PyArray_STRING); dtypes[0]->elsize = (int)self->memsizes[1]; } // no string temporaries, so no third case } if (dtypes[0] == NULL) { goto fail; } op_flags[0] = NPY_ITER_WRITEONLY| NPY_ITER_ALLOCATE| NPY_ITER_CONTIG| NPY_ITER_NBO| #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif NPY_ITER_NO_BROADCAST; } // Check for empty arrays in expression if (n_inputs > 0) { char retsig = get_return_sig(self->program); // Check length for all inputs int zeroi, zerolen = 0; for (i=0; i < n_inputs; i++) { if (PyArray_SIZE(operands[i+1]) == 0) { zerolen = 1; zeroi = i+1; break; } } if (zerolen != 0) { // Allocate the output int ndim = PyArray_NDIM(operands[zeroi]); npy_intp *dims = PyArray_DIMS(operands[zeroi]); operands[0] = (PyArrayObject *)PyArray_SimpleNew(ndim, dims, typecode_from_char(retsig)); if (operands[0] == NULL) { goto fail; } ret = (PyObject *)operands[0]; Py_INCREF(ret); goto cleanup_and_exit; } } /* A case with a single constant output */ if (n_inputs == 0) { char retsig = get_return_sig(self->program); /* Allocate the output */ if (operands[0] == NULL) { npy_intp dim = 1; operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, typecode_from_char(retsig)); if (operands[0] == NULL) { goto fail; } } else { PyArrayObject *a; if (PyArray_SIZE(operands[0]) != 1) { PyErr_SetString(PyExc_ValueError, "output for a constant expression must have size 1"); goto fail; } else if (!PyArray_ISWRITEABLE(operands[0])) { PyErr_SetString(PyExc_ValueError, "output is not writeable"); goto fail; } Py_INCREF(dtypes[0]); a = (PyArrayObject *)PyArray_FromArray(operands[0], dtypes[0], NPY_ALIGNED|NPY_UPDATEIFCOPY); if (a == NULL) { goto fail; } Py_DECREF(operands[0]); operands[0] = a; } r = run_interpreter_const(self, PyArray_BYTES(operands[0]), &pc_error); ret = (PyObject *)operands[0]; Py_INCREF(ret); goto cleanup_and_exit; } /* Allocate the iterator or nested iterators */ if (reduction_size == 1) { /* When there's no reduction, reduction_size is 1 as well */ iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_REDUCE_OK| NPY_ITER_RANGED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, 0, NULL, NULL, BLOCK_SIZE1); if (iter == NULL) { goto fail; } } else { npy_uint32 op_flags_outer[NPY_MAXDIMS]; /* The outer loop is unbuffered */ op_flags_outer[0] = NPY_ITER_READWRITE| NPY_ITER_ALLOCATE| NPY_ITER_NO_BROADCAST; for (i = 0; i < n_inputs; ++i) { op_flags_outer[i+1] = NPY_ITER_READONLY; } /* Arbitrary threshold for which is the inner loop...benchmark? */ if (reduction_size < 64) { reduction_outer_loop = true; iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_RANGED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, oa_ndim, op_axes, NULL, BLOCK_SIZE1); if (iter == NULL) { goto fail; } /* If the output was allocated, get it for the second iterator */ if (operands[0] == NULL) { operands[0] = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(operands[0]); } op_axes[0] = &op_axes_reduction_values[0]; for (i = 0; i < n_inputs; ++i) { op_axes[i+1] = &op_axes_reduction_values[i+1]; } op_flags_outer[0] &= ~NPY_ITER_NO_BROADCAST; reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_REDUCE_OK, order, casting, op_flags_outer, NULL, 1, op_axes, NULL, 0); if (reduce_iter == NULL) { goto fail; } } else { PyArray_Descr *dtypes_outer[NPY_MAXDIMS]; /* If the output is being allocated, need to specify its dtype */ dtypes_outer[0] = dtypes[0]; for (i = 0; i < n_inputs; ++i) { dtypes_outer[i+1] = NULL; } iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_RANGED, order, casting, op_flags_outer, dtypes_outer, oa_ndim, op_axes, NULL, 0); if (iter == NULL) { goto fail; } /* If the output was allocated, get it for the second iterator */ if (operands[0] == NULL) { operands[0] = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(operands[0]); } op_axes[0] = &op_axes_reduction_values[0]; for (i = 0; i < n_inputs; ++i) { op_axes[i+1] = &op_axes_reduction_values[i+1]; } op_flags[0] &= ~NPY_ITER_NO_BROADCAST; reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_REDUCE_OK| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, 1, op_axes, NULL, BLOCK_SIZE1); if (reduce_iter == NULL) { goto fail; } } } /* Initialize the output to the reduction unit */ if (is_reduction) { PyArrayObject *a = NpyIter_GetOperandArray(iter)[0]; if (last_opcode(self->program) >= OP_SUM && last_opcode(self->program) < OP_PROD) { PyObject *zero = PyLong_FromLong(0); PyArray_FillWithScalar(a, zero); Py_DECREF(zero); } else { PyObject *one = PyLong_FromLong(1); PyArray_FillWithScalar(a, one); Py_DECREF(one); } } /* Get the sizes of all the operands */ dtypes_tmp = NpyIter_GetDescrArray(iter); for (i = 0; i < n_inputs+1; ++i) { self->memsizes[i] = dtypes_tmp[i]->elsize; } /* For small calculations, just use 1 thread */ if (NpyIter_GetIterSize(iter) < 2*BLOCK_SIZE1) { gs.force_serial = 1; } /* Reductions do not support parallel execution yet */ if (is_reduction) { gs.force_serial = 1; } r = run_interpreter(self, iter, reduce_iter, reduction_outer_loop, need_output_buffering, &pc_error); if (r < 0) { if (r == -1) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "an error occurred while running the program"); } } else if (r == -2) { PyErr_Format(PyExc_RuntimeError, "bad argument at pc=%d", pc_error); } else if (r == -3) { PyErr_Format(PyExc_RuntimeError, "bad opcode at pc=%d", pc_error); } else { PyErr_SetString(PyExc_RuntimeError, "unknown error occurred while running the program"); } goto fail; } /* Get the output from the iterator */ ret = (PyObject *)NpyIter_GetOperandArray(iter)[0]; Py_INCREF(ret); NpyIter_Deallocate(iter); if (reduce_iter != NULL) { NpyIter_Deallocate(reduce_iter); } cleanup_and_exit: for (i = 0; i < n_inputs+1; i++) { Py_XDECREF(operands[i]); Py_XDECREF(dtypes[i]); } return ret; fail: for (i = 0; i < n_inputs+1; i++) { Py_XDECREF(operands[i]); Py_XDECREF(dtypes[i]); } if (iter != NULL) { NpyIter_Deallocate(iter); } if (reduce_iter != NULL) { NpyIter_Deallocate(reduce_iter); } return NULL; } /* Local Variables: c-basic-offset: 4 End: */ numexpr-2.2.2/numexpr/__init__.py0000644000175000001440000000444412207601660017466 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### """ Numexpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like "3*a+4*b") are accelerated and use less memory than doing the same calculation in Python. See: http://code.google.com/p/numexpr/ for more info about it. """ from __config__ import show as show_config, get_info if get_info('mkl'): use_vml = True else: use_vml = False from cpuinfo import cpu if cpu.is_AMD() or cpu.is_Intel(): is_cpu_amd_intel = True else: is_cpu_amd_intel = False import os, os.path import platform from numexpr.expressions import E from numexpr.necompiler import NumExpr, disassemble, evaluate from numexpr.tests import test, print_versions from numexpr.utils import ( get_vml_version, set_vml_accuracy_mode, set_vml_num_threads, set_num_threads, detect_number_of_cores) # Detect the number of cores ncores = detect_number_of_cores() # Initialize the number of threads to be used # If this is modified, please update the note in: # https://code.google.com/p/numexpr/wiki/UsersGuide?ts=1377763727&updated=UsersGuide#General_routines try: nthreads = int(os.environ['NUMEXPR_NUM_THREADS']) except KeyError: nthreads = ncores # Check that we don't activate too many threads at the same time. # 8 seems a sensible value. if nthreads > 8: nthreads = 8 # Check that we don't surpass the MAX_THREADS in interpreter.cpp if nthreads > 4096: nthreads = 4096 if 'sparc' in platform.machine(): import warnings warnings.warn('The number of threads have been set to 1 because problems related ' 'to threading have been reported on some sparc machine. ' 'The number of threads can be changes using the "set_num_threads" ' 'function.') set_num_threads(1) else: set_num_threads(nthreads) # The default for VML is 1 thread (see #39) set_vml_num_threads(1) import version dirname = os.path.dirname(__file__) __version__ = version.version numexpr-2.2.2/numexpr/utils.py0000644000175000001440000001022212132261472017056 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import os import subprocess from numexpr.interpreter import _set_num_threads from numexpr import use_vml if use_vml: from numexpr.interpreter import ( _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads) def get_vml_version(): """Get the VML/MKL library version.""" if use_vml: return _get_vml_version() else: return None def set_vml_accuracy_mode(mode): """ Set the accuracy mode for VML operations. The `mode` parameter can take the values: - 'high': high accuracy mode (HA), <1 least significant bit - 'low': low accuracy mode (LA), typically 1-2 least significant bits - 'fast': enhanced performance mode (EP) - None: mode settings are ignored This call is equivalent to the `vmlSetMode()` in the VML library. See: http://www.intel.com/software/products/mkl/docs/webhelp/vml/vml_DataTypesAccuracyModes.html for more info on the accuracy modes. Returns old accuracy settings. """ if use_vml: acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} if mode not in acc_dict.keys(): raise ValueError( "mode argument must be one of: None, 'high', 'low', 'fast'") retval = _set_vml_accuracy_mode(acc_dict.get(mode, 0)) return acc_reverse_dict.get(retval) else: return None def set_vml_num_threads(nthreads): """ Suggests a maximum number of threads to be used in VML operations. This function is equivalent to the call `mkl_domain_set_num_threads(nthreads, MKL_VML)` in the MKL library. See: http://www.intel.com/software/products/mkl/docs/webhelp/support/functn_mkl_domain_set_num_threads.html for more info about it. """ if use_vml: _set_vml_num_threads(nthreads) def set_num_threads(nthreads): """ Sets a number of threads to be used in operations. Returns the previous setting for the number of threads. During initialization time Numexpr sets this number to the number of detected cores in the system (see `detect_number_of_cores()`). If you are using Intel's VML, you may want to use `set_vml_num_threads(nthreads)` to perform the parallel job with VML instead. However, you should get very similar performance with VML-optimized functions, and VML's parallelizer cannot deal with common expresions like `(x+1)*(x-2)`, while Numexpr's one can. """ old_nthreads = _set_num_threads(nthreads) return old_nthreads def detect_number_of_cores(): """ Detects the number of cores on a system. Cribbed from pp. """ # Linux, Unix and MacOS: if hasattr(os, "sysconf"): if "SC_NPROCESSORS_ONLN" in os.sysconf_names: # Linux & Unix: ncpus = os.sysconf("SC_NPROCESSORS_ONLN") if isinstance(ncpus, int) and ncpus > 0: return ncpus else: # OSX: return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) # Windows: if os.environ.has_key("NUMBER_OF_PROCESSORS"): ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]); if ncpus > 0: return ncpus return 1 # Default class CacheDict(dict): """ A dictionary that prevents itself from growing too much. """ def __init__(self, maxentries): self.maxentries = maxentries super(CacheDict, self).__init__(self) def __setitem__(self, key, value): # Protection against growing the cache too much if len(self) > self.maxentries: # Remove a 10% of (arbitrary) elements from the cache entries_to_remove = self.maxentries // 10 for k in self.keys()[:entries_to_remove]: super(CacheDict, self).__delitem__(k) super(CacheDict, self).__setitem__(key, value) numexpr-2.2.2/numexpr/necompiler.py0000644000175000001440000006232312221256772020072 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import __future__ import sys import numpy from numexpr import interpreter, expressions, use_vml, is_cpu_amd_intel from numexpr.utils import CacheDict # Declare a double type that does not exist in Python space double = numpy.double if sys.version_info[0] < 3: int_ = int long_ = long else: int_ = numpy.int32 long_ = numpy.int64 typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double', 'c': 'complex', 's': 'bytes', 'n' : 'none'} kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd', 'complex': 'c', 'bytes': 's', 'none' : 'n'} type_to_typecode = {bool: 'b', int_: 'i', long_:'l', float:'f', double: 'd', complex: 'c', bytes: 's'} type_to_kind = expressions.type_to_kind kind_to_type = expressions.kind_to_type default_type = kind_to_type[expressions.default_kind] # Final addtions for Python 3 (mainly for PyTables needs) if sys.version_info[0] > 2: typecode_to_kind['s'] = 'str' kind_to_typecode['str'] = 's' type_to_typecode[str] = 's' scalar_constant_kinds = kind_to_typecode.keys() class ASTNode(object): """Abstract Syntax Tree node. Members: astType -- type of node (op, constant, variable, raw, or alias) astKind -- the type of the result (bool, float, etc.) value -- value associated with this node. An opcode, numerical value, a variable name, etc. children -- the children below this node reg -- the register assigned to the result for this node. """ cmpnames = ['astType', 'astKind', 'value', 'children'] def __init__(self, astType='generic', astKind='unknown', value=None, children=()): object.__init__(self) self.astType = astType self.astKind = astKind self.value = value self.children = tuple(children) self.reg = None def __eq__(self, other): if self.astType == 'alias': self = self.value if other.astType == 'alias': other = other.value if not isinstance(other, ASTNode): return False for name in self.cmpnames: if getattr(self, name) != getattr(other, name): return False return True def __hash__(self): if self.astType == 'alias': self = self.value return hash((self.astType, self.astKind, self.value, self.children)) def __str__(self): return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, self.value, self.children, self.reg) def __repr__(self): return '' % id(self) def key(self): return (self.astType, self.astKind, self.value, self.children) def typecode(self): return kind_to_typecode[self.astKind] def postorderWalk(self): for c in self.children: for w in c.postorderWalk(): yield w yield self def allOf(self, *astTypes): astTypes = set(astTypes) for w in self.postorderWalk(): if w.astType in astTypes: yield w def expressionToAST(ex): """Take an expression tree made out of expressions.ExpressionNode, and convert to an AST tree. This is necessary as ExpressionNode overrides many methods to act like a number. """ return ASTNode(ex.astType, ex.astKind, ex.value, [expressionToAST(c) for c in ex.children]) def sigPerms(s): """Generate all possible signatures derived by upcasting the given signature. """ codes = 'bilfdc' if not s: yield '' elif s[0] in codes: start = codes.index(s[0]) for x in codes[start:]: for y in sigPerms(s[1:]): yield x + y elif s[0] == 's': # numbers shall not be cast to strings for y in sigPerms(s[1:]): yield 's' + y else: yield s def typeCompileAst(ast): """Assign appropiate types to each node in the AST. Will convert opcodes and functions to appropiate upcast version, and add "cast" ops if needed. """ children = list(ast.children) if ast.astType == 'op': retsig = ast.typecode() basesig = ''.join(x.typecode() for x in list(ast.children)) # Find some operation that will work on an acceptable casting of args. for sig in sigPerms(basesig): value = (ast.value + '_' + retsig + sig).encode('ascii') if value in interpreter.opcodes: break else: for sig in sigPerms(basesig): funcname = (ast.value + '_' + retsig + sig).encode('ascii') if funcname in interpreter.funccodes: value = ('func_%sn' % (retsig+sig)).encode('ascii') children += [ASTNode('raw', 'none', interpreter.funccodes[funcname])] break else: raise NotImplementedError( "couldn't find matching opcode for '%s'" % (ast.value + '_' + retsig+basesig)) # First just cast constants, then cast variables if necessary: for i, (have, want) in enumerate(zip(basesig, sig)): if have != want: kind = typecode_to_kind[want] if children[i].astType == 'constant': children[i] = ASTNode('constant', kind, children[i].value) else: opname = "cast" children[i] = ASTNode('op', kind, opname, [children[i]]) else: value = ast.value children = ast.children return ASTNode(ast.astType, ast.astKind, value, [typeCompileAst(c) for c in children]) class Register(object): """Abstraction for a register in the VM. Members: node -- the AST node this corresponds to temporary -- True if this isn't an input or output immediate -- not a register, but an immediate value n -- the physical register number. None if no number assigned yet. """ def __init__(self, astnode, temporary=False): self.node = astnode self.temporary = temporary self.immediate = False self.n = None def __str__(self): if self.temporary: name = 'Temporary' else: name = 'Register' return '%s(%s, %s, %s)' % (name, self.node.astType, self.node.astKind, self.n,) def __repr__(self): return self.__str__() class Immediate(Register): """Representation of an immediate (integer) operand, instead of a register. """ def __init__(self, astnode): Register.__init__(self, astnode) self.immediate = True def __str__(self): return 'Immediate(%d)' % (self.node.value,) def stringToExpression(s, types, context): """Given a string, convert it to a tree of ExpressionNode's. """ old_ctx = expressions._context.get_current_context() try: expressions._context.set_new_context(context) # first compile to a code object to determine the names if context.get('truediv', False): flags = __future__.division.compiler_flag else: flags = 0 c = compile(s, '', 'eval', flags) # make VariableNode's for the names names = {} for name in c.co_names: if name == "None": names[name] = None elif name == "True": names[name] = True elif name == "False": names[name] = False else: t = types.get(name, default_type) names[name] = expressions.VariableNode(name, type_to_kind[t]) names.update(expressions.functions) # now build the expression ex = eval(c, names) if expressions.isConstant(ex): ex = expressions.ConstantNode(ex, expressions.getKind(ex)) elif not isinstance(ex, expressions.ExpressionNode): raise TypeError("unsupported expression type: %s" % type(ex)) finally: expressions._context.set_new_context(old_ctx) return ex def isReduction(ast): return ast.value.startswith(b'sum_') or ast.value.startswith(b'prod_') def getInputOrder(ast, input_order=None): """Derive the input order of the variables in an expression. """ variables = {} for a in ast.allOf('variable'): variables[a.value] = a variable_names = set(variables.keys()) if input_order: if variable_names != set(input_order): raise ValueError( "input names (%s) don't match those found in expression (%s)" % (input_order, variable_names)) ordered_names = input_order else: ordered_names = list(variable_names) ordered_names.sort() ordered_variables = [variables[v] for v in ordered_names] return ordered_variables def convertConstantToKind(x, kind): # Exception for 'float' types that will return the NumPy float32 type if kind == 'float': return numpy.float32(x) return kind_to_type[kind](x) def getConstants(ast): const_map = {} for a in ast.allOf('constant'): const_map[(a.astKind, a.value)] = a ordered_constants = const_map.keys() ordered_constants.sort() constants_order = [const_map[v] for v in ordered_constants] constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] return constants_order, constants def sortNodesByOrder(nodes, order): order_map = {} for i, (_, v, _) in enumerate(order): order_map[v] = i dec_nodes = [(order_map[n.value], n) for n in nodes] dec_nodes.sort() return [a[1] for a in dec_nodes] def assignLeafRegisters(inodes, registerMaker): """Assign new registers to each of the leaf nodes. """ leafRegisters = {} for node in inodes: key = node.key() if key in leafRegisters: node.reg = leafRegisters[key] else: node.reg = leafRegisters[key] = registerMaker(node) def assignBranchRegisters(inodes, registerMaker): """Assign temporary registers to each of the branch nodes. """ for node in inodes: node.reg = registerMaker(node, temporary=True) def collapseDuplicateSubtrees(ast): """Common subexpression elimination. """ seen = {} aliases = [] for a in ast.allOf('op'): if a in seen: target = seen[a] a.astType = 'alias' a.value = target a.children = () aliases.append(a) else: seen[a] = a # Set values and registers so optimizeTemporariesAllocation # doesn't get confused for a in aliases: while a.value.astType == 'alias': a.value = a.value.value return aliases def optimizeTemporariesAllocation(ast): """Attempt to minimize the number of temporaries needed, by reusing old ones. """ nodes = [n for n in ast.postorderWalk() if n.reg.temporary] users_of = dict((n.reg, set()) for n in nodes) node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) for n in nodes) if nodes and nodes[-1] is not ast: nodes_to_check = nodes + [ast] else: nodes_to_check = nodes for n in nodes_to_check: for c in n.children: if c.reg.temporary: users_of[c.reg].add(n) unused = dict([(tc, set()) for tc in scalar_constant_kinds]) for n in nodes: for c in n.children: reg = c.reg if reg.temporary: users = users_of[reg] users.discard(n) if not users: unused[reg.node.astKind].add(reg) if unused[n.astKind]: reg = unused[n.astKind].pop() users_of[reg] = users_of[n.reg] n.reg = reg def setOrderedRegisterNumbers(order, start): """Given an order of nodes, assign register numbers. """ for i, node in enumerate(order): node.reg.n = start + i return start + len(order) def setRegisterNumbersForTemporaries(ast, start): """Assign register numbers for temporary registers, keeping track of aliases and handling immediate operands. """ seen = 0 signature = '' aliases = [] for node in ast.postorderWalk(): if node.astType == 'alias': aliases.append(node) node = node.value if node.reg.immediate: node.reg.n = node.value continue reg = node.reg if reg.n is None: reg.n = start + seen seen += 1 signature += reg.node.typecode() for node in aliases: node.reg = node.value.reg return start + seen, signature def convertASTtoThreeAddrForm(ast): """Convert an AST to a three address form. Three address form is (op, reg1, reg2, reg3), where reg1 is the destination of the result of the instruction. I suppose this should be called three register form, but three address form is found in compiler theory. """ return [(node.value, node.reg) + tuple([c.reg for c in node.children]) for node in ast.allOf('op')] def compileThreeAddrForm(program): """Given a three address form of the program, compile it a string that the VM understands. """ def nToChr(reg): if reg is None: return b'\xff' elif reg.n < 0: raise ValueError("negative value for register number %s" % reg.n) else: if sys.version_info[0] < 3: return chr(reg.n) else: # int.to_bytes is not available in Python < 3.2 #return reg.n.to_bytes(1, sys.byteorder) return bytes([reg.n]) def quadrupleToString(opcode, store, a1=None, a2=None): cop = chr(interpreter.opcodes[opcode]).encode('ascii') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) return cop + cs + ca1 + ca2 def toString(args): while len(args) < 4: args += (None,) opcode, store, a1, a2 = args[:4] s = quadrupleToString(opcode, store, a1, a2) l = [s] args = args[4:] while args: s = quadrupleToString(b'noop', *args[:3]) l.append(s) args = args[3:] return b''.join(l) prog_str = b''.join([toString(t) for t in program]) return prog_str context_info = [ ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), ('truediv', (False, True, 'auto'), 'auto') ] def getContext(kwargs, frame_depth=1): d = kwargs.copy() context = {} for name, allowed, default in context_info: value = d.pop(name, default) if value in allowed: context[name] = value else: raise ValueError("'%s' must be one of %s" % (name, allowed)) if d: raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0]) if context['truediv'] == 'auto': caller_globals = sys._getframe(frame_depth + 1).f_globals context['truediv'] = \ caller_globals.get('division', None) == __future__.division return context def precompile(ex, signature=(), context={}): """Compile the expression to an intermediate form. """ types = dict(signature) input_order = [name for (name, type_) in signature] if isinstance(ex, (str, unicode)): ex = stringToExpression(ex, types, context) # the AST is like the expression, but the node objects don't have # any odd interpretations ast = expressionToAST(ex) if ex.astType != 'op': ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,)) ast = typeCompileAst(ast) aliases = collapseDuplicateSubtrees(ast) assignLeafRegisters(ast.allOf('raw'), Immediate) assignLeafRegisters(ast.allOf('variable', 'constant'), Register) assignBranchRegisters(ast.allOf('op'), Register) # assign registers for aliases for a in aliases: a.reg = a.value.reg input_order = getInputOrder(ast, input_order) constants_order, constants = getConstants(ast) if isReduction(ast): ast.reg.temporary = False optimizeTemporariesAllocation(ast) ast.reg.temporary = False r_output = 0 ast.reg.n = 0 r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) r_temps = setOrderedRegisterNumbers(constants_order, r_constants) r_end, tempsig = setRegisterNumbersForTemporaries(ast, r_temps) threeAddrProgram = convertASTtoThreeAddrForm(ast) input_names = tuple([a.value for a in input_order]) signature = ''.join(type_to_typecode[types.get(x, default_type)] for x in input_names) return threeAddrProgram, signature, tempsig, constants, input_names # Note that the copy_args() is just to guarantee compatibility # with PyTables < 3.0. See #115 for details. def NumExpr(ex, signature=(), copy_args=(), **kwargs): """ Compile an expression built using E. variables to a function. ex can also be specified as a string "2*a+3*b". The order of the input variables and their types can be specified using the signature parameter, which is a list of (name, type) pairs. Returns a `NumExpr` object containing the compiled function. """ # NumExpr can be called either directly by the end-user, in which case # kwargs need to be sanitized by getContext, or by evaluate, # in which case kwargs are in already sanitized. # In that case frame_depth is wrong (it should be 2) but it doesn't matter # since it will not be used (because truediv='auto' has already been # translated to either True or False). context = getContext(kwargs, frame_depth=1) threeAddrProgram, inputsig, tempsig, constants, input_names = \ precompile(ex, signature, context) program = compileThreeAddrForm(threeAddrProgram) return interpreter.NumExpr(inputsig.encode('ascii'), tempsig.encode('ascii'), program, constants, input_names) def disassemble(nex): """ Given a NumExpr object, return a list which is the program disassembled. """ rev_opcodes = {} for op in interpreter.opcodes: rev_opcodes[interpreter.opcodes[op]] = op r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) def getArg(pc, offset): if sys.version_info[0] < 3: arg = ord(nex.program[pc+offset]) op = rev_opcodes.get(ord(nex.program[pc])) else: arg = nex.program[pc+offset] op = rev_opcodes.get(nex.program[pc]) try: code = op.split(b'_')[1][offset-1] except IndexError: return None if sys.version_info[0] > 2: # int.to_bytes is not available in Python < 3.2 #code = code.to_bytes(1, sys.byteorder) code = bytes([code]) if arg == 255: return None if code != b'n': if arg == 0: return b'r0' elif arg < r_constants: return ('r%d[%s]' % (arg, nex.input_names[arg-1])).encode('ascii') elif arg < r_temps: return ('c%d[%s]' % (arg, nex.constants[arg - r_constants])).encode('ascii') else: return ('t%d' % (arg,)).encode('ascii') else: return arg source = [] for pc in range(0, len(nex.program), 4): if sys.version_info[0] < 3: op = rev_opcodes.get(ord(nex.program[pc])) else: op = rev_opcodes.get(nex.program[pc]) dest = getArg(pc, 1) arg1 = getArg(pc, 2) arg2 = getArg(pc, 3) source.append( (op, dest, arg1, arg2) ) return source def getType(a): kind = a.dtype.kind if kind == 'b': return bool if kind in 'iu': if a.dtype.itemsize > 4: return long_ # ``long`` is for integers of more than 32 bits if kind == 'u' and a.dtype.itemsize == 4: return long_ # use ``long`` here as an ``int`` is not enough return int_ if kind == 'f': if a.dtype.itemsize > 4: return double # ``double`` is for floats of more than 32 bits return float if kind == 'c': return complex if kind == 'S': return bytes raise ValueError("unkown type %s" % a.dtype.name) def getExprNames(text, context): ex = stringToExpression(text, {}, context) ast = expressionToAST(ex) input_order = getInputOrder(ast, None) #try to figure out if vml operations are used by expression if not use_vml: ex_uses_vml = False else: for node in ast.postorderWalk(): if node.astType == 'op' \ and node.value in ['sin', 'cos', 'exp', 'log', 'expm1', 'log1p', 'pow', 'div', 'sqrt', 'inv', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos', 'arctan', 'arccosh', 'arcsinh', 'arctanh', 'arctan2', 'abs']: ex_uses_vml = True break else: ex_uses_vml = False return [a.value for a in input_order], ex_uses_vml # Dictionaries for caching variable names and compiled expressions _names_cache = CacheDict(256) _numexpr_cache = CacheDict(256) def evaluate(ex, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs): """Evaluate a simple array expression element-wise, using the new iterator. ex is a string forming an expression, like "2*a+3*b". The values for "a" and "b" will by default be taken from the calling function's frame (through use of sys._getframe()). Alternatively, they can be specifed using the 'local_dict' or 'global_dict' arguments. Parameters ---------- local_dict : dictionary, optional A dictionary that replaces the local operands in current frame. global_dict : dictionary, optional A dictionary that replaces the global operands in current frame. out : NumPy array, optional An existing array where the outcome is going to be stored. Care is required so that this array has the same shape and type than the actual outcome of the computation. Useful for avoiding unnecessary new array allocations. order : {'C', 'F', 'A', or 'K'}, optional Controls the iteration order for operands. 'C' means C order, 'F' means Fortran order, 'A' means 'F' order if all the arrays are Fortran contiguous, 'C' order otherwise, and 'K' means as close to the order the array elements appear in memory as possible. For efficient computations, typically 'K'eep order (the default) is desired. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional Controls what kind of data casting may occur when making a copy or buffering. Setting this to 'unsafe' is not recommended, as it can adversely affect accumulations. * 'no' means the data types should not be cast at all. * 'equiv' means only byte-order changes are allowed. * 'safe' means only casts which can preserve values are allowed. * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. """ if not isinstance(ex, (str, unicode)): raise ValueError("must specify expression as a string") # Get the names for this expression context = getContext(kwargs, frame_depth=1) expr_key = (ex, tuple(sorted(context.items()))) if expr_key not in _names_cache: _names_cache[expr_key] = getExprNames(ex, context) names, ex_uses_vml = _names_cache[expr_key] # Get the arguments based on the names. call_frame = sys._getframe(1) if local_dict is None: local_dict = call_frame.f_locals if global_dict is None: global_dict = call_frame.f_globals arguments = [] for name in names: try: a = local_dict[name] except KeyError: a = global_dict[name] arguments.append(numpy.asarray(a)) # Create a signature signature = [(name, getType(arg)) for (name, arg) in zip(names, arguments)] # Look up numexpr if possible. numexpr_key = expr_key + (tuple(signature),) try: compiled_ex = _numexpr_cache[numexpr_key] except KeyError: compiled_ex = _numexpr_cache[numexpr_key] = \ NumExpr(ex, signature, **context) kwargs = {'out': out, 'order': order, 'casting': casting, 'ex_uses_vml': ex_uses_vml} return compiled_ex(*arguments, **kwargs) numexpr-2.2.2/numexpr/win32/0000755000175000001440000000000012221261175016310 5ustar faltetusers00000000000000numexpr-2.2.2/numexpr/win32/pthread.c0000644000175000001440000001565512206676467020140 0ustar faltetusers00000000000000/* * Code for simulating pthreads API on Windows. This is Git-specific, * but it is enough for Numexpr needs too. * * Copyright (C) 2009 Andrzej K. Haczewski * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * DISCLAIMER: The implementation is Git-specific, it is subset of original * Pthreads API, without lots of other features that Git doesn't use. * Git also makes sure that the passed arguments are valid, so there's * no need for double-checking. */ #include "pthread.h" #include #include #include #include #include void die(const char *err, ...) { printf("%s", err); exit(-1); } static unsigned __stdcall win32_start_routine(void *arg) { pthread_t *thread = arg; thread->arg = thread->start_routine(thread->arg); return 0; } int pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg) { thread->arg = arg; thread->start_routine = start_routine; thread->handle = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL); if (!thread->handle) return errno; else return 0; } int win32_pthread_join(pthread_t *thread, void **value_ptr) { DWORD result = WaitForSingleObject(thread->handle, INFINITE); switch (result) { case WAIT_OBJECT_0: if (value_ptr) *value_ptr = thread->arg; return 0; case WAIT_ABANDONED: return EINVAL; default: return GetLastError(); } } int pthread_cond_init(pthread_cond_t *cond, const void *unused) { cond->waiters = 0; cond->was_broadcast = 0; InitializeCriticalSection(&cond->waiters_lock); cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); if (!cond->sema) die("CreateSemaphore() failed"); cond->continue_broadcast = CreateEvent(NULL, /* security */ FALSE, /* auto-reset */ FALSE, /* not signaled */ NULL); /* name */ if (!cond->continue_broadcast) die("CreateEvent() failed"); return 0; } int pthread_cond_destroy(pthread_cond_t *cond) { CloseHandle(cond->sema); CloseHandle(cond->continue_broadcast); DeleteCriticalSection(&cond->waiters_lock); return 0; } int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex) { int last_waiter; EnterCriticalSection(&cond->waiters_lock); cond->waiters++; LeaveCriticalSection(&cond->waiters_lock); /* * Unlock external mutex and wait for signal. * NOTE: we've held mutex locked long enough to increment * waiters count above, so there's no problem with * leaving mutex unlocked before we wait on semaphore. */ LeaveCriticalSection(mutex); /* let's wait - ignore return value */ WaitForSingleObject(cond->sema, INFINITE); /* * Decrease waiters count. If we are the last waiter, then we must * notify the broadcasting thread that it can continue. * But if we continued due to cond_signal, we do not have to do that * because the signaling thread knows that only one waiter continued. */ EnterCriticalSection(&cond->waiters_lock); cond->waiters--; last_waiter = cond->was_broadcast && cond->waiters == 0; LeaveCriticalSection(&cond->waiters_lock); if (last_waiter) { /* * cond_broadcast was issued while mutex was held. This means * that all other waiters have continued, but are contending * for the mutex at the end of this function because the * broadcasting thread did not leave cond_broadcast, yet. * (This is so that it can be sure that each waiter has * consumed exactly one slice of the semaphor.) * The last waiter must tell the broadcasting thread that it * can go on. */ SetEvent(cond->continue_broadcast); /* * Now we go on to contend with all other waiters for * the mutex. Auf in den Kampf! */ } /* lock external mutex again */ EnterCriticalSection(mutex); return 0; } /* * IMPORTANT: This implementation requires that pthread_cond_signal * is called while the mutex is held that is used in the corresponding * pthread_cond_wait calls! */ int pthread_cond_signal(pthread_cond_t *cond) { int have_waiters; EnterCriticalSection(&cond->waiters_lock); have_waiters = cond->waiters > 0; LeaveCriticalSection(&cond->waiters_lock); /* * Signal only when there are waiters */ if (have_waiters) return ReleaseSemaphore(cond->sema, 1, NULL) ? 0 : GetLastError(); else return 0; } /* * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast * is called while the mutex is held that is used in the corresponding * pthread_cond_wait calls! */ int pthread_cond_broadcast(pthread_cond_t *cond) { EnterCriticalSection(&cond->waiters_lock); if ((cond->was_broadcast = cond->waiters > 0)) { /* wake up all waiters */ ReleaseSemaphore(cond->sema, cond->waiters, NULL); LeaveCriticalSection(&cond->waiters_lock); /* * At this point all waiters continue. Each one takes its * slice of the semaphor. Now it's our turn to wait: Since * the external mutex is held, no thread can leave cond_wait, * yet. For this reason, we can be sure that no thread gets * a chance to eat *more* than one slice. OTOH, it means * that the last waiter must send us a wake-up. */ WaitForSingleObject(cond->continue_broadcast, INFINITE); /* * Since the external mutex is held, no thread can enter * cond_wait, and, hence, it is safe to reset this flag * without cond->waiters_lock held. */ cond->was_broadcast = 0; } else { LeaveCriticalSection(&cond->waiters_lock); } return 0; } numexpr-2.2.2/numexpr/win32/pthread.h0000644000175000001440000000641312206676467020135 0ustar faltetusers00000000000000/* * Code for simulating pthreads API on Windows. This is Git-specific, * but it is enough for Numexpr needs too. * * Copyright (C) 2009 Andrzej K. Haczewski * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * DISCLAIMER: The implementation is Git-specific, it is subset of original * Pthreads API, without lots of other features that Git doesn't use. * Git also makes sure that the passed arguments are valid, so there's * no need for double-checking. */ #ifndef PTHREAD_H #define PTHREAD_H #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #ifdef __cplusplus extern "C" { #endif /* * Defines that adapt Windows API threads to pthreads API */ #define pthread_mutex_t CRITICAL_SECTION #define pthread_mutex_init(a,b) InitializeCriticalSection((a)) #define pthread_mutex_destroy(a) DeleteCriticalSection((a)) #define pthread_mutex_lock EnterCriticalSection #define pthread_mutex_unlock LeaveCriticalSection /* * Implement simple condition variable for Windows threads, based on ACE * implementation. * * See original implementation: http://bit.ly/1vkDjo * ACE homepage: http://www.cse.wustl.edu/~schmidt/ACE.html * See also: http://www.cse.wustl.edu/~schmidt/win32-cv-1.html */ typedef struct { LONG waiters; int was_broadcast; CRITICAL_SECTION waiters_lock; HANDLE sema; HANDLE continue_broadcast; } pthread_cond_t; extern int pthread_cond_init(pthread_cond_t *cond, const void *unused); extern int pthread_cond_destroy(pthread_cond_t *cond); extern int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex); extern int pthread_cond_signal(pthread_cond_t *cond); extern int pthread_cond_broadcast(pthread_cond_t *cond); /* * Simple thread creation implementation using pthread API */ typedef struct { HANDLE handle; void *(*start_routine)(void*); void *arg; } pthread_t; extern int pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg); /* * To avoid the need of copying a struct, we use small macro wrapper to pass * pointer to win32_pthread_join instead. */ #define pthread_join(a, b) win32_pthread_join(&(a), (b)) extern int win32_pthread_join(pthread_t *thread, void **value_ptr); #ifdef __cplusplus } // extern "C" #endif #endif /* PTHREAD_H */ numexpr-2.2.2/numexpr/version.py0000644000175000001440000000151612221260713017405 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### version='2.2.2' release=True if not release: version += '.dev' import os svn_version_file = os.path.join(os.path.dirname(__file__), '__svn_version__.py') if os.path.isfile(svn_version_file): import imp svn = imp.load_module('numexpr.__svn_version__', open(svn_version_file), svn_version_file, ('.py','U',1)) version += svn.version numexpr-2.2.2/numexpr/interp_body.cpp0000644000175000001440000004634512206710034020400 0ustar faltetusers00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ { #define VEC_LOOP(expr) for(j = 0; j < BLOCK_SIZE; j++) { \ expr; \ } #define VEC_ARG0(expr) \ BOUNDS_CHECK(store_in); \ { \ char *dest = mem[store_in]; \ VEC_LOOP(expr); \ } break #define VEC_ARG1(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ nowarns += 1; \ VEC_LOOP(expr); \ } break #define VEC_ARG2(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ char *x2 = mem[arg2]; \ npy_intp ss2 = params.memsizes[arg2]; \ npy_intp sb2 = memsteps[arg2]; \ nowarns += ss2+sb2+*x2; \ VEC_LOOP(expr); \ } break #define VEC_ARG3(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ BOUNDS_CHECK(arg3); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ char *x2 = mem[arg2]; \ npy_intp ss2 = params.memsizes[arg2]; \ npy_intp sb2 = memsteps[arg2]; \ char *x3 = mem[arg3]; \ npy_intp ss3 = params.memsizes[arg3]; \ npy_intp sb3 = memsteps[arg3]; \ nowarns += ss2+sb2+*x2; \ nowarns += ss3+sb3+*x3; \ VEC_LOOP(expr); \ } break #define VEC_ARG1_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ expr; \ } break #define VEC_ARG2_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ char *x2 = mem[arg2]; \ expr; \ } break #define VEC_ARG3_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ BOUNDS_CHECK(arg3); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ char *x2 = mem[arg2]; \ char *x3 = mem[arg3]; \ expr; \ } break int pc; unsigned int j; // set up pointers to next block of inputs and outputs #ifdef SINGLE_ITEM_CONST_LOOP mem[0] = params.output; #else // SINGLE_ITEM_CONST_LOOP // use the iterator's inner loop data memcpy(mem, iter_dataptr, (1+params.n_inputs)*sizeof(char*)); # ifndef NO_OUTPUT_BUFFERING // if output buffering is necessary, first write to the buffer if(params.out_buffer != NULL) { mem[0] = params.out_buffer; } # endif // NO_OUTPUT_BUFFERING memcpy(memsteps, iter_strides, (1+params.n_inputs)*sizeof(npy_intp)); #endif // SINGLE_ITEM_CONST_LOOP // WARNING: From now on, only do references to mem[arg[123]] // & memsteps[arg[123]] inside the VEC_ARG[123] macros, // or you will risk accessing invalid addresses. for (pc = 0; pc < params.prog_len; pc += 4) { unsigned char op = params.program[pc]; unsigned int store_in = params.program[pc+1]; unsigned int arg1 = params.program[pc+2]; unsigned int arg2 = params.program[pc+3]; #define arg3 params.program[pc+5] // Iterator reduce macros #ifdef REDUCTION_INNER_LOOP // Reduce is the inner loop #define i_reduce *(int *)dest #define l_reduce *(long long *)dest #define f_reduce *(float *)dest #define d_reduce *(double *)dest #define cr_reduce *(double *)dest #define ci_reduce *((double *)dest+1) #else /* Reduce is the outer loop */ #define i_reduce i_dest #define l_reduce l_dest #define f_reduce f_dest #define d_reduce d_dest #define cr_reduce cr_dest #define ci_reduce ci_dest #endif #define b_dest ((char *)dest)[j] #define i_dest ((int *)dest)[j] #define l_dest ((long long *)dest)[j] #define f_dest ((float *)dest)[j] #define d_dest ((double *)dest)[j] #define cr_dest ((double *)dest)[2*j] #define ci_dest ((double *)dest)[2*j+1] #define s_dest ((char *)dest + j*memsteps[store_in]) #define b1 ((char *)(x1+j*sb1))[0] #define i1 ((int *)(x1+j*sb1))[0] #define l1 ((long long *)(x1+j*sb1))[0] #define f1 ((float *)(x1+j*sb1))[0] #define d1 ((double *)(x1+j*sb1))[0] #define c1r ((double *)(x1+j*sb1))[0] #define c1i ((double *)(x1+j*sb1))[1] #define s1 ((char *)x1+j*sb1) #define b2 ((char *)(x2+j*sb2))[0] #define i2 ((int *)(x2+j*sb2))[0] #define l2 ((long long *)(x2+j*sb2))[0] #define f2 ((float *)(x2+j*sb2))[0] #define d2 ((double *)(x2+j*sb2))[0] #define c2r ((double *)(x2+j*sb2))[0] #define c2i ((double *)(x2+j*sb2))[1] #define s2 ((char *)x2+j*sb2) #define b3 ((char *)(x3+j*sb3))[0] #define i3 ((int *)(x3+j*sb3))[0] #define l3 ((long long *)(x3+j*sb3))[0] #define f3 ((float *)(x3+j*sb3))[0] #define d3 ((double *)(x3+j*sb3))[0] #define c3r ((double *)(x3+j*sb3))[0] #define c3i ((double *)(x3+j*sb3))[1] #define s3 ((char *)x3+j*sb3) /* Some temporaries */ double da, db; npy_cdouble ca, cb; switch (op) { case OP_NOOP: break; case OP_COPY_BB: VEC_ARG1(b_dest = b1); case OP_COPY_SS: VEC_ARG1(memcpy(s_dest, s1, ss1)); /* The next versions of copy opcodes can cope with unaligned data even on platforms that crash while accessing it (like the Sparc architecture under Solaris). */ case OP_COPY_II: VEC_ARG1(memcpy(&i_dest, s1, sizeof(int))); case OP_COPY_LL: VEC_ARG1(memcpy(&l_dest, s1, sizeof(long long))); case OP_COPY_FF: VEC_ARG1(memcpy(&f_dest, s1, sizeof(float))); case OP_COPY_DD: VEC_ARG1(memcpy(&d_dest, s1, sizeof(double))); case OP_COPY_CC: VEC_ARG1(memcpy(&cr_dest, s1, sizeof(double)*2)); /* Bool */ case OP_INVERT_BB: VEC_ARG1(b_dest = !b1); case OP_AND_BBB: VEC_ARG2(b_dest = (b1 && b2)); case OP_OR_BBB: VEC_ARG2(b_dest = (b1 || b2)); case OP_EQ_BBB: VEC_ARG2(b_dest = (b1 == b2)); case OP_NE_BBB: VEC_ARG2(b_dest = (b1 != b2)); case OP_WHERE_BBBB: VEC_ARG3(b_dest = b1 ? b2 : b3); /* Comparisons */ case OP_GT_BII: VEC_ARG2(b_dest = (i1 > i2)); case OP_GE_BII: VEC_ARG2(b_dest = (i1 >= i2)); case OP_EQ_BII: VEC_ARG2(b_dest = (i1 == i2)); case OP_NE_BII: VEC_ARG2(b_dest = (i1 != i2)); case OP_GT_BLL: VEC_ARG2(b_dest = (l1 > l2)); case OP_GE_BLL: VEC_ARG2(b_dest = (l1 >= l2)); case OP_EQ_BLL: VEC_ARG2(b_dest = (l1 == l2)); case OP_NE_BLL: VEC_ARG2(b_dest = (l1 != l2)); case OP_GT_BFF: VEC_ARG2(b_dest = (f1 > f2)); case OP_GE_BFF: VEC_ARG2(b_dest = (f1 >= f2)); case OP_EQ_BFF: VEC_ARG2(b_dest = (f1 == f2)); case OP_NE_BFF: VEC_ARG2(b_dest = (f1 != f2)); case OP_GT_BDD: VEC_ARG2(b_dest = (d1 > d2)); case OP_GE_BDD: VEC_ARG2(b_dest = (d1 >= d2)); case OP_EQ_BDD: VEC_ARG2(b_dest = (d1 == d2)); case OP_NE_BDD: VEC_ARG2(b_dest = (d1 != d2)); case OP_GT_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) > 0)); case OP_GE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) >= 0)); case OP_EQ_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) == 0)); case OP_NE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) != 0)); /* Int */ case OP_CAST_IB: VEC_ARG1(i_dest = (int)(b1)); case OP_ONES_LIKE_II: VEC_ARG0(i_dest = 1); case OP_NEG_II: VEC_ARG1(i_dest = -i1); case OP_ADD_III: VEC_ARG2(i_dest = i1 + i2); case OP_SUB_III: VEC_ARG2(i_dest = i1 - i2); case OP_MUL_III: VEC_ARG2(i_dest = i1 * i2); case OP_DIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) : 0); case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (int)pow((double)i1, i2)); case OP_MOD_III: VEC_ARG2(i_dest = i2 ? (i1 % i2) : 0); case OP_WHERE_IBII: VEC_ARG3(i_dest = b1 ? i2 : i3); /* Long */ case OP_CAST_LI: VEC_ARG1(l_dest = (long long)(i1)); case OP_ONES_LIKE_LL: VEC_ARG0(l_dest = 1); case OP_NEG_LL: VEC_ARG1(l_dest = -l1); case OP_ADD_LLL: VEC_ARG2(l_dest = l1 + l2); case OP_SUB_LLL: VEC_ARG2(l_dest = l1 - l2); case OP_MUL_LLL: VEC_ARG2(l_dest = l1 * l2); case OP_DIV_LLL: VEC_ARG2(l_dest = l2 ? (l1 / l2) : 0); case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)pow((long double)l1, (long double)l2)); case OP_MOD_LLL: VEC_ARG2(l_dest = l2 ? (l1 % l2) : 0); case OP_WHERE_LBLL: VEC_ARG3(l_dest = b1 ? l2 : l3); /* Float */ case OP_CAST_FI: VEC_ARG1(f_dest = (float)(i1)); case OP_CAST_FL: VEC_ARG1(f_dest = (float)(l1)); case OP_ONES_LIKE_FF: VEC_ARG0(f_dest = 1.0); case OP_NEG_FF: VEC_ARG1(f_dest = -f1); case OP_ADD_FFF: VEC_ARG2(f_dest = f1 + f2); case OP_SUB_FFF: VEC_ARG2(f_dest = f1 - f2); case OP_MUL_FFF: VEC_ARG2(f_dest = f1 * f2); case OP_DIV_FFF: #ifdef USE_VML VEC_ARG2_VML(vsDiv(BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = f1 / f2); #endif case OP_POW_FFF: #ifdef USE_VML VEC_ARG2_VML(vsPow(BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = powf(f1, f2)); #endif case OP_MOD_FFF: VEC_ARG2(f_dest = f1 - floorf(f1/f2) * f2); case OP_SQRT_FF: #ifdef USE_VML VEC_ARG1_VML(vsSqrt(BLOCK_SIZE, (float*)x1, (float*)dest)); #else VEC_ARG1(f_dest = sqrtf(f1)); #endif case OP_WHERE_FBFF: VEC_ARG3(f_dest = b1 ? f2 : f3); case OP_FUNC_FFN: #ifdef USE_VML VEC_ARG1_VML(functions_ff_vml[arg2](BLOCK_SIZE, (float*)x1, (float*)dest)); #else VEC_ARG1(f_dest = functions_ff[arg2](f1)); #endif case OP_FUNC_FFFN: #ifdef USE_VML VEC_ARG2_VML(functions_fff_vml[arg3](BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = functions_fff[arg3](f1, f2)); #endif /* Double */ case OP_CAST_DI: VEC_ARG1(d_dest = (double)(i1)); case OP_CAST_DL: VEC_ARG1(d_dest = (double)(l1)); case OP_CAST_DF: VEC_ARG1(d_dest = (double)(f1)); case OP_ONES_LIKE_DD: VEC_ARG0(d_dest = 1.0); case OP_NEG_DD: VEC_ARG1(d_dest = -d1); case OP_ADD_DDD: VEC_ARG2(d_dest = d1 + d2); case OP_SUB_DDD: VEC_ARG2(d_dest = d1 - d2); case OP_MUL_DDD: VEC_ARG2(d_dest = d1 * d2); case OP_DIV_DDD: #ifdef USE_VML VEC_ARG2_VML(vdDiv(BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = d1 / d2); #endif case OP_POW_DDD: #ifdef USE_VML VEC_ARG2_VML(vdPow(BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = pow(d1, d2)); #endif case OP_MOD_DDD: VEC_ARG2(d_dest = d1 - floor(d1/d2) * d2); case OP_SQRT_DD: #ifdef USE_VML VEC_ARG1_VML(vdSqrt(BLOCK_SIZE, (double*)x1, (double*)dest)); #else VEC_ARG1(d_dest = sqrt(d1)); #endif case OP_WHERE_DBDD: VEC_ARG3(d_dest = b1 ? d2 : d3); case OP_FUNC_DDN: #ifdef USE_VML VEC_ARG1_VML(functions_dd_vml[arg2](BLOCK_SIZE, (double*)x1, (double*)dest)); #else VEC_ARG1(d_dest = functions_dd[arg2](d1)); #endif case OP_FUNC_DDDN: #ifdef USE_VML VEC_ARG2_VML(functions_ddd_vml[arg3](BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = functions_ddd[arg3](d1, d2)); #endif /* Complex */ case OP_CAST_CI: VEC_ARG1(cr_dest = (double)(i1); ci_dest = 0); case OP_CAST_CL: VEC_ARG1(cr_dest = (double)(l1); ci_dest = 0); case OP_CAST_CF: VEC_ARG1(cr_dest = f1; ci_dest = 0); case OP_CAST_CD: VEC_ARG1(cr_dest = d1; ci_dest = 0); case OP_ONES_LIKE_CC: VEC_ARG0(cr_dest = 1; ci_dest = 0); case OP_NEG_CC: VEC_ARG1(cr_dest = -c1r; ci_dest = -c1i); case OP_ADD_CCC: VEC_ARG2(cr_dest = c1r + c2r; ci_dest = c1i + c2i); case OP_SUB_CCC: VEC_ARG2(cr_dest = c1r - c2r; ci_dest = c1i - c2i); case OP_MUL_CCC: VEC_ARG2(da = c1r*c2r - c1i*c2i; ci_dest = c1r*c2i + c1i*c2r; cr_dest = da); case OP_DIV_CCC: #ifdef USE_VMLXXX /* VML complex division is slower */ VEC_ARG2_VML(vzDiv(BLOCK_SIZE, (const MKL_Complex16*)x1, (const MKL_Complex16*)x2, (MKL_Complex16*)dest)); #else VEC_ARG2(da = c2r*c2r + c2i*c2i; db = (c1r*c2r + c1i*c2i) / da; ci_dest = (c1i*c2r - c1r*c2i) / da; cr_dest = db); #endif case OP_EQ_BCC: VEC_ARG2(b_dest = (c1r == c2r && c1i == c2i)); case OP_NE_BCC: VEC_ARG2(b_dest = (c1r != c2r || c1i != c2i)); case OP_WHERE_CBCC: VEC_ARG3(cr_dest = b1 ? c2r : c3r; ci_dest = b1 ? c2i : c3i); case OP_FUNC_CCN: #ifdef USE_VML VEC_ARG1_VML(functions_cc_vml[arg2](BLOCK_SIZE, (const MKL_Complex16*)x1, (MKL_Complex16*)dest)); #else VEC_ARG1(ca.real = c1r; ca.imag = c1i; functions_cc[arg2](&ca, &ca); cr_dest = ca.real; ci_dest = ca.imag); #endif case OP_FUNC_CCCN: VEC_ARG2(ca.real = c1r; ca.imag = c1i; cb.real = c2r; cb.imag = c2i; functions_ccc[arg3](&ca, &cb, &ca); cr_dest = ca.real; ci_dest = ca.imag); case OP_REAL_DC: VEC_ARG1(d_dest = c1r); case OP_IMAG_DC: VEC_ARG1(d_dest = c1i); case OP_COMPLEX_CDD: VEC_ARG2(cr_dest = d1; ci_dest = d2); /* Reductions */ case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); case OP_SUM_LLN: VEC_ARG1(l_reduce += l1); case OP_SUM_FFN: VEC_ARG1(f_reduce += f1); case OP_SUM_DDN: VEC_ARG1(d_reduce += d1); case OP_SUM_CCN: VEC_ARG1(cr_reduce += c1r; ci_reduce += c1i); case OP_PROD_IIN: VEC_ARG1(i_reduce *= i1); case OP_PROD_LLN: VEC_ARG1(l_reduce *= l1); case OP_PROD_FFN: VEC_ARG1(f_reduce *= f1); case OP_PROD_DDN: VEC_ARG1(d_reduce *= d1); case OP_PROD_CCN: VEC_ARG1(da = cr_reduce*c1r - ci_reduce*c1i; ci_reduce = cr_reduce*c1i + ci_reduce*c1r; cr_reduce = da); default: *pc_error = pc; return -3; break; } } #ifndef NO_OUTPUT_BUFFERING // If output buffering was necessary, copy the buffer to the output if(params.out_buffer != NULL) { memcpy(iter_dataptr[0], params.out_buffer, params.memsizes[0] * BLOCK_SIZE); } #endif // NO_OUTPUT_BUFFERING #undef VEC_LOOP #undef VEC_ARG1 #undef VEC_ARG2 #undef VEC_ARG3 #undef i_reduce #undef l_reduce #undef f_reduce #undef d_reduce #undef cr_reduce #undef ci_reduce #undef b_dest #undef i_dest #undef l_dest #undef f_dest #undef d_dest #undef cr_dest #undef ci_dest #undef s_dest #undef b1 #undef i1 #undef l1 #undef f1 #undef d1 #undef c1r #undef c1i #undef s1 #undef b2 #undef i2 #undef l2 #undef f2 #undef d2 #undef c2r #undef c2i #undef s2 #undef b3 #undef i3 #undef l3 #undef f3 #undef d3 #undef c3r #undef c3i #undef s3 } /* Local Variables: c-basic-offset: 4 End: */ numexpr-2.2.2/numexpr/numexpr_config.hpp0000644000175000001440000000321312132261472021102 0ustar faltetusers00000000000000#ifndef NUMEXPR_CONFIG_HPP #define NUMEXPR_CONFIG_HPP // x86 platform works with unaligned reads and writes // MW: I have seen exceptions to this when the compiler chooses to use aligned SSE #if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) # define USE_UNALIGNED_ACCESS 1 #endif #ifdef USE_VML /* The values below have been tuned for a nowadays Core2 processor */ /* Note: with VML functions a larger block size (e.g. 4096) allows to make use * of the automatic multithreading capabilities of the VML library */ #define BLOCK_SIZE1 4096 #define BLOCK_SIZE2 32 #else /* The values below have been tuned for a nowadays Core2 processor */ /* Note: without VML available a smaller block size is best, specially * for the strided and unaligned cases. Recent implementation of * multithreading make it clear that larger block sizes benefit * performance (although it seems like we don't need very large sizes * like VML yet). */ #define BLOCK_SIZE1 1024 #define BLOCK_SIZE2 16 #endif /* The maximum number of threads (for some static arrays). * Choose this large enough for most monsters out there. Keep in sync this with the number in __init__.py. */ #define MAX_THREADS 4096 #if defined(_WIN32) #include "win32/pthread.h" #include #define getpid _getpid #else #include #include "unistd.h" #endif #ifdef SCIPY_MKL_H #define USE_VML #endif #ifdef USE_VML #include "mkl_vml.h" #include "mkl_service.h" #endif #ifdef _WIN32 #ifndef __MINGW32__ #include "missing_posix_functions.hpp" #endif #include "msvc_function_stubs.hpp" #endif #endif // NUMEXPR_CONFIG_HPPnumexpr-2.2.2/numexpr/msvc_function_stubs.hpp0000644000175000001440000000614112132261472022157 0ustar faltetusers00000000000000#ifndef NUMEXPR_MSVC_FUNCTION_STUBS_HPP #define NUMEXPR_MSVC_FUNCTION_STUBS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* Declare stub functions for MSVC. It turns out that single precision definitions in are actually #define'd and are not usable as function pointers :-/ */ #if _MSC_VER < 1400 // 1310 == MSVC 7.1 /* Apparently, single precision functions are not included in MSVC 7.1 */ #define sqrtf(x) ((float)sqrt((double)(x))) #define sinf(x) ((float)sin((double)(x))) #define cosf(x) ((float)cos((double)(x))) #define tanf(x) ((float)tan((double)(x))) #define asinf(x) ((float)asin((double)(x))) #define acosf(x) ((float)acos((double)(x))) #define atanf(x) ((float)atan((double)(x))) #define sinhf(x) ((float)sinh((double)(x))) #define coshf(x) ((float)cosh((double)(x))) #define tanhf(x) ((float)tanh((double)(x))) #define asinhf(x) ((float)asinh((double)(x))) #define acoshf(x) ((float)acosh((double)(x))) #define atanhf(x) ((float)atanh((double)(x))) #define logf(x) ((float)log((double)(x))) #define log1pf(x) ((float)log1p((double)(x))) #define log10f(x) ((float)log10((double)(x))) #define expf(x) ((float)exp((double)(x))) #define expm1f(x) ((float)expm1((double)(x))) #define fabsf(x) ((float)fabs((double)(x))) #define fmodf(x, y) ((float)fmod((double)(x), (double)(y))) #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) #define floorf(x) ((float)floor((double)(x))) #endif // _MSC_VER < 1400 /* Now the actual stubs */ inline float sqrtf2(float x) { return sqrtf(x); } inline float sinf2(float x) { return sinf(x); } inline float cosf2(float x) { return cosf(x); } inline float tanf2(float x) { return tanf(x); } inline float asinf2(float x) { return asinf(x); } inline float acosf2(float x) { return acosf(x); } inline float atanf2(float x) { return atanf(x); } inline float sinhf2(float x) { return sinhf(x); } inline float coshf2(float x) { return coshf(x); } inline float tanhf2(float x) { return tanhf(x); } inline float asinhf2(float x) { return asinhf(x); } inline float acoshf2(float x) { return acoshf(x); } inline float atanhf2(float x) { return atanhf(x); } inline float logf2(float x) { return logf(x); } inline float log1pf2(float x) { return log1pf(x); } inline float log10f2(float x) { return log10f(x); } inline float expf2(float x) { return expf(x); } inline float expm1f2(float x) { return expm1f(x); } inline float fabsf2(float x) { return fabsf(x); } inline float fmodf2(float x, float y) { return fmodf(x, y); } inline float atan2f2(float x, float y) { return atan2f(x, y); } #endif // NUMEXPR_MSVC_FUNCTION_STUBS_HPP numexpr-2.2.2/numexpr/missing_posix_functions.hpp0000644000175000001440000000363612132261472023053 0ustar faltetusers00000000000000#ifndef NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP #define NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* These functions are not included in some non-POSIX compilers, like MSVC 7.1 */ /* Double precision versions */ inline double log1p(double x) { double u = 1.0 + x; if (u == 1.0) { return x; } else { return log(u) * x / (u-1.0); } } inline double expm1(double x) { double u = exp(x); if (u == 1.0) { return x; } else if (u-1.0 == -1.0) { return -1; } else { return (u-1.0) * x/log(u); } } inline double asinh(double xx) { double x, d; int sign; if (xx < 0.0) { sign = -1; x = -xx; } else { sign = 1; x = xx; } if (x > 1e8) { d = x; } else { d = sqrt(x*x + 1.0); } return sign*log1p(x*(1.0 + x/(d+1.0))); } inline double acosh(double x) { return 2*log(sqrt((x+1.0)/2)+sqrt((x-1.0)/2)); } inline double atanh(double x) { /* This definition is different from that in NumPy 1.3 and follows the convention of MatLab. This will allow for double checking both approaches. */ return 0.5*log((1.0+x)/(1.0-x)); } /* Single precision versions */ inline float log1pf(float x) { return (float) log1p((double)x); } inline float expm1f(float x) { return (float) expm1((double)x); } inline float asinhf(float x) { return (float) asinh((double)x); } inline float acoshf(float x) { return (float) acosh((double)x); } inline float atanhf(float x) { return (float) atanh((double)x); } #endif // NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP numexpr-2.2.2/numexpr.egg-info/0000755000175000001440000000000012221261175017040 5ustar faltetusers00000000000000numexpr-2.2.2/numexpr.egg-info/not-zip-safe0000644000175000001440000000000112132261500021257 0ustar faltetusers00000000000000 numexpr-2.2.2/numexpr.egg-info/SOURCES.txt0000644000175000001440000000206312221261175020725 0ustar faltetusers00000000000000ANNOUNCE.txt AUTHORS.txt CMakeLists.txt INSTALL.txt LICENSE.txt MANIFEST.in README.txt RELEASE_NOTES.txt setup.py site.cfg.example ./numexpr/tests/__init__.py ./numexpr/tests/test_numexpr.py bench/boolean_timing.py bench/issue-36.py bench/issue-47.py bench/multidim.py bench/poly.py bench/timing.py bench/unaligned-simple.py bench/varying-expr.py bench/vml_timing.py numexpr/__init__.py numexpr/complex_functions.hpp numexpr/cpuinfo.py numexpr/expressions.py numexpr/functions.hpp numexpr/interp_body.cpp numexpr/interpreter.cpp numexpr/interpreter.hpp numexpr/missing_posix_functions.hpp numexpr/module.cpp numexpr/module.hpp numexpr/msvc_function_stubs.hpp numexpr/necompiler.py numexpr/numexpr_config.hpp numexpr/numexpr_object.cpp numexpr/numexpr_object.hpp numexpr/opcodes.hpp numexpr/utils.py numexpr/version.py numexpr.egg-info/PKG-INFO numexpr.egg-info/SOURCES.txt numexpr.egg-info/dependency_links.txt numexpr.egg-info/not-zip-safe numexpr.egg-info/top_level.txt numexpr/tests/__init__.py numexpr/tests/test_numexpr.py numexpr/win32/pthread.c numexpr/win32/pthread.hnumexpr-2.2.2/numexpr.egg-info/PKG-INFO0000644000175000001440000000047012221261175020136 0ustar faltetusers00000000000000Metadata-Version: 1.0 Name: numexpr Version: 2.2.2 Summary: Fast numerical expression evaluator for NumPy Home-page: http://code.google.com/p/numexpr/ Author: David M. Cooke, Francesc Alted and others Author-email: david.m.cooke@gmail.com, faltet@pytables.org License: MIT Description: UNKNOWN Platform: UNKNOWN numexpr-2.2.2/numexpr.egg-info/dependency_links.txt0000644000175000001440000000000112221261175023106 0ustar faltetusers00000000000000 numexpr-2.2.2/numexpr.egg-info/top_level.txt0000644000175000001440000000001012221261175021561 0ustar faltetusers00000000000000numexpr numexpr-2.2.2/INSTALL.txt0000644000175000001440000000305212132261472015520 0ustar faltetusers00000000000000These are instructions for installing Numexpr on Unix systems. For Windows, it is best to install it from binaries. However, you should note that, for the time being, we cannot provide Windows binaries with VML support. Building ======== This version of `Numexpr` requires Python 2.6 or greater, and NumPy 1.6 or greater. It's built in the standard Python way: $ python setup.py build $ python setup.py install You can test `numexpr` with: $ python -c "import numexpr; numexpr.test()" Enabling Intel's VML support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ numexpr includes support for Intel's VML library. This allows for better performance on Intel architectures, mainly when evaluating transcendental functions (trigonometrical, exponential...). It also enables numexpr using several CPU cores. If you have Intel's MKL (the library that embeds VML), just copy the `site.cfg.example` that comes in the distribution to `site.cfg` and edit the latter giving proper directions on how to find your MKL libraries in your system. After doing this, you can proceed with the usual building instructions listed above. Pay attention to the messages during the building process in order to know whether MKL has been detected or not. Finally, you can check the speed-ups on your machine by running the `bench/vml_timing.py` script (you can play with different parameters to the `set_vml_accuracy_mode()` and `set_vml_num_threads()` functions in the script so as to see how it would affect performance). .. Local Variables: .. mode: text .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.2.2/MANIFEST.in0000644000175000001440000000027012136462720015411 0ustar faltetusers00000000000000include MANIFEST.in include *.txt *.cfg site.cfg.example recursive-include numexpr *.cpp *.hpp *.py exclude numexpr/__config__.py exclude RELEASING.txt recursive-include bench *.py numexpr-2.2.2/setup.cfg0000644000175000001440000000007312221261175015471 0ustar faltetusers00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 numexpr-2.2.2/CMakeLists.txt0000644000175000001440000000643112132261472016415 0ustar faltetusers00000000000000# WARNING! EXPERIMENTAL! Use setup.py if you're not familiar with cmake. # # We recommend that you create a separate directory for the build, # so that the build files aren't mixed in with the source files. # e.g. # $ mkdir build-cmake # $ cd build-cmake # $ cmake .. # $ make # # MacOSX Notes: # On MacOSX, it may default to 64-bit, even if your Python is 32-bit. # The linker will give NO WARNING, and the resulting .so file will # fail to load in Python. To fix this, run cmake as follows, # assuming you're in /build-cmake/: # cmake -DCMAKE_OSX_ARCHITECTURES=i386 .. # To debug this issue, you can compare "lipo -info `which python`" # with "lipo -info numexpr.so". They should have the same platform info. # # Further problems on OS X appear to be related to EPD Python. CMake's # default detection may be detecting the wrong python to link against. # # Windows Notes: # Python 2.7 is built with Visual C++ 9 (aka 2008). This is the one # you should pick when running cmake-gui. Be sure to switch the build # configuration from Debug to Release (or RelWithDebInfo) in Visual Studio. project(numexpr) cmake_minimum_required(VERSION 2.8) # Force the default build type to be Release, because a Debug # build doesn't work properly with the default Python build if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) find_package(PythonInterp REQUIRED) find_package(PythonLibsNew REQUIRED) find_package(NumPy REQUIRED) # Default install location for Python packages if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "${PYTHON_SITE_PACKAGES}" CACHE STRING "Choose the Python module directory (default site-packages)" FORCE) endif() # Require version >= 1.6 if(NUMPY_VERSION_DECIMAL LESS 10600) message(FATAL_ERROR, "NumExpr requires NumPy >= 1.6") endif() include_directories( ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIRS} ) set(numexpr_SRC numexpr/interpreter.cpp numexpr/module.cpp numexpr/numexpr_object.cpp numexpr/complex_functions.hpp numexpr/functions.hpp numexpr/interpreter.hpp numexpr/module.hpp numexpr/missing_posix_functions.hpp numexpr/msvc_function_stubs.hpp numexpr/numexpr_config.hpp numexpr/numexpr_object.hpp numexpr/opcodes.hpp ) if(CMAKE_HOST_WIN32) set(numexpr_SRC ${numexpr_SRC} numexpr/win32/pthread.c ) endif() python_add_module(interpreter ${numexpr_SRC}) # Generate __config__.py. This is a dummy placeholder, as I # don't know why it's here. file(WRITE "${PROJECT_BINARY_DIR}/__config__.py" "# This file is generated by a CMakeFiles.txt configuration\n" "__all__ = ['get_info','show']\n" "def get_info(name):\n" " return None\n" "def show():\n" " print('someone called show()')\n") # Install all the Python scripts install(DIRECTORY numexpr DESTINATION "${CMAKE_INSTALL_PREFIX}" FILES_MATCHING PATTERN "*.py") # Install __config__.py install(FILES "${PROJECT_BINARY_DIR}/__config__.py" DESTINATION "${CMAKE_INSTALL_PREFIX}/numexpr") # Install the module install(TARGETS interpreter DESTINATION "${CMAKE_INSTALL_PREFIX}/numexpr") numexpr-2.2.2/README.txt0000644000175000001440000002452212132261472015354 0ustar faltetusers00000000000000What it is Numexpr? =================== Numexpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like "3*a+4*b") are accelerated and use less memory than doing the same calculation in Python. In addition, its multi-threaded capabilities can make use of all your cores, which may accelerate computations, most specially if they are not memory-bounded (e.g. those using transcendental functions). Last but not least, numexpr can make use of Intel's VML (Vector Math Library, normally integrated in its Math Kernel Library, or MKL). This allows further acceleration of transcendent expressions. Examples of use =============== >>> import numpy as np >>> import numexpr as ne >>> a = np.arange(1e6) # Choose large arrays >>> b = np.arange(1e6) >>> ne.evaluate("a + 1") # a simple expression array([ 1.00000000e+00, 2.00000000e+00, 3.00000000e+00, ..., 9.99998000e+05, 9.99999000e+05, 1.00000000e+06]) >>> ne.evaluate('a*b-4.1*a > 2.5*b') # a more complex one array([False, False, False, ..., True, True, True], dtype=bool) >>> ne.evaluate("sin(a) + arcsinh(a/b)") # you can also use functions array([ NaN, 1.72284457, 1.79067101, ..., 1.09567006, 0.17523598, -0.09597844]) >>> s = np.array(['abba', 'abbb', 'abbcdef']) >>> ne.evaluate("'abba' == s") # string arrays are supported too array([ True, False, False], dtype=bool) Datatypes supported internally ============================== Numexpr operates internally only with the following types: * 8-bit boolean (bool) * 32-bit signed integer (int or int32) * 64-bit signed integer (long or int64) * 32-bit single-precision floating point number (float or float32) * 64-bit, double-precision floating point number (double or float64) * 2x64-bit, double-precision complex number (complex or complex128) * Raw string of bytes (str) If the arrays in the expression does not match any of these types, they will be upcasted to one of the above types (following the usual type inference rules, see below). Have this in mind when doing estimations about the memory consumption during the computation of your expressions. Also, the types in Numexpr conditions are somewhat stricter than those of Python. For instance, the only valid constants for booleans are `True` and `False`, and they are never automatically cast to integers. Casting rules ============= Casting rules in Numexpr follow closely those of NumPy. However, for implementation reasons, there are some known exceptions to this rule, namely: * When an array with type `int8`, `uint8`, `int16` or `uint16` is used inside Numexpr, it is internally upcasted to an `int` (or `int32` in NumPy notation). * When an array with type `uint32` is used inside Numexpr, it is internally upcasted to a `long` (or `int64` in NumPy notation). * A floating point function (e.g. `sin`) acting on `int8` or `int16` types returns a `float64` type, instead of the `float32` that is returned by NumPy functions. This is mainly due to the absence of native `int8` or `int16` types in Numexpr. * In operations implying a scalar and an array, the normal rules of casting are used in Numexpr, in contrast with NumPy, where array types takes priority. For example, if 'a' is an array of type `float32` and 'b' is an scalar of type `float64` (or Python `float` type, which is equivalent), then 'a*b' returns a `float64` in Numexpr, but a `float32` in NumPy (i.e. array operands take priority in determining the result type). If you need to keep the result a `float32`, be sure you use a `float32` scalar too. Supported operators =================== Numexpr supports the set of operators listed below: * Logical operators: &, |, ~ * Comparison operators: <, <=, ==, !=, >=, > * Unary arithmetic operators: - * Binary arithmetic operators: +, -, *, /, **, % Supported functions =================== The next are the current supported set: * where(bool, number1, number2): number Number1 if the bool condition is true, number2 otherwise. * {sin,cos,tan}(float|complex): float|complex Trigonometric sine, cosine or tangent. * {arcsin,arccos,arctan}(float|complex): float|complex Trigonometric inverse sine, cosine or tangent. * arctan2(float1, float2): float Trigonometric inverse tangent of float1/float2. * {sinh,cosh,tanh}(float|complex): float|complex Hyperbolic sine, cosine or tangent. * {arcsinh,arccosh,arctanh}(float|complex): float|complex Hyperbolic inverse sine, cosine or tangent. * {log,log10,log1p}(float|complex): float|complex Natural, base-10 and log(1+x) logarithms. * {exp,expm1}(float|complex): float|complex Exponential and exponential minus one. * sqrt(float|complex): float|complex Square root. * abs(float|complex): float|complex Absolute value. * {real,imag}(complex): float Real or imaginary part of complex. * complex(float, float): complex Complex from real and imaginary parts. .. Notes: + `abs()` for complex inputs returns a ``complex`` output too. This is a departure from NumPy where a ``float`` is returned instead. However, Numexpr is not flexible enough yet so as to allow this to happen. Meanwhile, if you want to mimic NumPy behaviour, you may want to select the real part via the ``real`` function (e.g. "real(abs(cplx))") or via the ``real`` selector (e.g. "abs(cplx).real"). More functions can be added if you need them. Supported reduction operations ============================== The next are the current supported set: * sum(number, axis=None): Sum of array elements over a given axis. Negative axis are not supported. * prod(number, axis=None): Product of array elements over a given axis. Negative axis are not supported. General routines ================ * evaluate(expression, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs): Evaluate a simple array expression element-wise. See docstrings for more info on parameters. Also, see examples above. * test(): Run all the tests in the test suite. * print_versions(): Print the versions of software that numexpr relies on. * set_num_threads(nthreads): Sets a number of threads to be used in operations. Returns the previous setting for the number of threads. During initialization time Numexpr sets this number to the number of detected cores in the system (see `detect_number_of_cores()`). If you are using Intel's VML, you may want to use `set_vml_num_threads(nthreads)` to perform the parallel job with VML instead. However, you should get very similar performance with VML-optimized functions, and VML's parallelizer cannot deal with common expresions like `(x+1)*(x-2)`, while Numexpr's one can. * detect_number_of_cores(): Detects the number of cores in the system. Intel's VML specific support routines ===================================== When compiled with Intel's VML (Vector Math Library), you will be able to use some additional functions for controlling its use. These are: * set_vml_accuracy_mode(mode): Set the accuracy for VML operations. The `mode` parameter can take the values: - 'low': Equivalent to VML_LA - low accuracy VML functions are called - 'high': Equivalent to VML_HA - high accuracy VML functions are called - 'fast': Equivalent to VML_EP - enhanced performance VML functions are called It returns the previous mode. This call is equivalent to the `vmlSetMode()` in the VML library. See: http://www.intel.com/software/products/mkl/docs/webhelp/vml/vml_DataTypesAccuracyModes.html for more info on the accuracy modes. * set_vml_num_threads(nthreads): Suggests a maximum number of threads to be used in VML operations. This function is equivalent to the call `mkl_domain_set_num_threads(nthreads, MKL_VML)` in the MKL library. See: http://www.intel.com/software/products/mkl/docs/webhelp/support/functn_mkl_domain_set_num_threads.html for more info about it. * get_vml_version(): Get the VML/MKL library version. How Numexpr can achieve such a high performance? ================================================ The main reason why Numexpr achieves better performance than NumPy (or even than plain C code) is that it avoids the creation of whole temporaries for keeping intermediate results, so saving memory bandwidth (the main bottleneck in many computations in nowadays computers). Due to this, it works best with arrays that are large enough (typically larger than processor caches). Briefly, it works as follows. Numexpr parses the expression into its own op-codes, that will be used by the integrated computing virtual machine. Then, the array operands are split in small chunks (that easily fit in the cache of the CPU) and passed to the virtual machine. Then, the computational phase starts, and the virtual machine applies the op-code operations for each chunk, saving the outcome in the resulting array. It is worth noting that all the temporaries and constants in the expression are kept in the same small chunk sizes than the operand ones, avoiding additional memory (and most specially, memory bandwidth) waste. The result is that Numexpr can get the most of your machine computing capabilities for array-wise computations. Just to give you an idea of its performance, common speed-ups with regard to NumPy are usually between 0.95x (for very simple expressions, like ’a + 1’) and 4x (for relatively complex ones, like 'a*b-4.1*a > 2.5*b'), although much higher speed-ups can be achieved (up to 15x can be seen in not too esoteric expressions) because this depends on the kind of the operations and how many operands participates in the expression. Of course, Numexpr will perform better (in comparison with NumPy) with larger matrices, i.e. typically those that does not fit in the cache of your CPU. In order to get a better idea on the different speed-ups that can be achieved for your own platform, you may want to run the benchmarks in the directory bench/. See more info about how Numexpr works in: http://code.google.com/p/numexpr/wiki/Overview Authors ======= See AUTHORS.txt License ======= Numexpr is distributed under the MIT license (see LICENSE.txt file). .. Local Variables: .. mode: text .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.2.2/PKG-INFO0000644000175000001440000000047012221261175014746 0ustar faltetusers00000000000000Metadata-Version: 1.0 Name: numexpr Version: 2.2.2 Summary: Fast numerical expression evaluator for NumPy Home-page: http://code.google.com/p/numexpr/ Author: David M. Cooke, Francesc Alted and others Author-email: david.m.cooke@gmail.com, faltet@pytables.org License: MIT Description: UNKNOWN Platform: UNKNOWN numexpr-2.2.2/RELEASE_NOTES.txt0000644000175000001440000002763412221260713016452 0ustar faltetusers00000000000000====================================== Release notes for Numexpr 2.2 series ====================================== Changes from 2.2.1 to 2.2.2 =========================== * The `copy_args` argument of `NumExpr` function has been brought back. This has been mainly necessary for compatibility with PyTables < 3.0, which I decided to continue to support. Fixed #115. * The `__nonzero__` method in `ExpressionNode` class has been commented out. This is also for compatibility with PyTables < 3.0. See #24 for details. * Fixed the type of some parameters in the C extension so that s390 architecture compiles. Fixes #116. Thank to Antonio Valentino for reporting and the patch. Changes from 2.2 to 2.2.1 ========================= * Fixes a secondary effect of "from numpy.testing import `*`", where division is imported now too, so only then necessary functions from there are imported now. Thanks to Christoph Gohlke for the patch. Changes from 2.1 to 2.2 ======================= * [LICENSE] Fixed a problem with the license of the numexpr/win32/pthread.{c,h} files emulating pthreads on Windows platforms. After persmission from the original authors is granted, these files adopt the MIT license and can be redistributed without problems. See issue #109 for details (https://code.google.com/p/numexpr/issues/detail?id=110). * [ENH] Improved the algorithm to decide the initial number of threads to be used. This was necessary because by default, numexpr was using a number of threads equal to the detected number of cores, and this can be just too much for moder systems where this number can be too high (and counterporductive for performance in many cases). Now, the 'NUMEXPR_NUM_THREADS' environment variable is honored, and in case this is not present, a maximum number of *8* threads are setup initially. The new algorithm is fully described in the Users Guide now in the note of 'General routines' section: https://code.google.com/p/numexpr/wiki/UsersGuide#General_routines. Closes #110. * [ENH] numexpr.test() returns `TestResult` instead of None now. Closes #111. * [FIX] Modulus with zero with integers no longer crashes the interpreter. It nows puts a zero in the result. Fixes #107. * [API CLEAN] Removed `copy_args` argument of `evaluate`. This should only be used by old versions of PyTables (< 3.0). * [DOC] Documented the `optimization` and `truediv` flags of `evaluate` in Users Guide (https://code.google.com/p/numexpr/wiki/UsersGuide). Changes from 2.0.1 to 2.1 =========================== * Dropped compatibility with Python < 2.6. * Improve compatibiity with Python 3: - switch from PyString to PyBytes API (requires Python >= 2.6). - fixed incompatibilities regarding the int/long API - use the Py_TYPE macro - use the PyVarObject_HEAD_INIT macro instead of PyObject_HEAD_INIT * Fixed several issues with different platforms not supporting multithreading or subprocess properly (see tickets #75 and #77). * Now, when trying to use pure Python boolean operators, 'and', 'or' and 'not', an error is issued suggesting that '&', '|' and '~' should be used instead (fixes #24). Changes from 2.0 to 2.0.1 ========================= * Added compatibility with Python 2.5 (2.4 is definitely not supported anymore). * `numexpr.evaluate` is fully documented now, in particular the new `out`, `order` and `casting` parameters. * Reduction operations are fully documented now. * Negative axis in reductions are not supported (they have never been actually), and a `ValueError` will be raised if they are used. Changes from 1.x series to 2.0 ============================== - Added support for the new iterator object in NumPy 1.6 and later. This allows for better performance with operations that implies broadcast operations, fortran-ordered or non-native byte orderings. Performance for other scenarios is preserved (except for very small arrays). - Division in numexpr is consistent now with Python/NumPy. Fixes #22 and #58. - Constants like "2." or "2.0" must be evaluated as float, not integer. Fixes #59. - `evaluate()` function has received a new parameter `out` for storing the result in already allocated arrays. This is very useful when dealing with large arrays, and a allocating new space for keeping the result is not acceptable. Closes #56. - Maximum number of threads raised from 256 to 4096. Machines with a higher number of cores will still be able to import numexpr, but limited to 4096 (which is an absurdly high number already). Changes from 1.4.1 to 1.4.2 =========================== - Multithreaded operation is disabled for small arrays (< 32 KB). This allows to remove the overhead of multithreading for such a small arrays. Closes #36. - Dividing int arrays by zero gives a 0 as result now (and not a floating point exception anymore. This behaviour mimics NumPy. Thanks to Gaëtan de Menten for the fix. Closes #37. - When compiled with VML support, the number of threads is set to 1 for VML core, and to the number of cores for the native pthreads implementation. This leads to much better performance. Closes #39. - Fixed different issues with reduction operations (`sum`, `prod`). The problem is that the threaded code does not work well for broadcasting or reduction operations. Now, the serial code is used in those cases. Closes #41. - Optimization of "compilation phase" through a better hash. This can lead up to a 25% of improvement when operating with variable expressions over small arrays. Thanks to Gaëtan de Menten for the patch. Closes #43. - The ``set_num_threads`` now returns the number of previous thread setting, as stated in the docstrings. Changes from 1.4 to 1.4.1 ========================= - Mingw32 can also work with pthreads compatibility code for win32. Fixes #31. - Fixed a problem that used to happen when running Numexpr with threads in subprocesses. It seems that threads needs to be initialized whenever a subprocess is created. Fixes #33. - The GIL (Global Interpreter Lock) is released during computations. This should allow for better resource usage for multithreaded apps. Fixes #35. Changes from 1.3.1 to 1.4 ========================= - Added support for multi-threading in pure C. This is to avoid the GIL and allows to squeeze the best performance in both multi-core machines. - David Cooke contributed a thorough refactorization of the opcode machinery for the virtual machine. With this, it is really easy to add more opcodes. See: http://code.google.com/p/numexpr/issues/detail?id=28 as an example. - Added a couple of opcodes to VM: where_bbbb and cast_ib. The first allow to get boolean arrays out of the `where` function. The second allows to cast a boolean array into an integer one. Thanks to gdementen for his contribution. - Fix negation of `int64` numbers. Closes #25. - Using a `npy_intp` datatype (instead of plain `int`) so as to be able to manage arrays larger than 2 GB. Changes from 1.3 to 1.3.1 ========================= - Due to an oversight, ``uint32`` types were not properly supported. That has been solved. Fixes #19. - Function `abs` for computing the absolute value added. However, it does not strictly follow NumPy conventions. See ``README.txt`` or website docs for more info on this. Thanks to Pauli Virtanen for the patch. Fixes #20. Changes from 1.2 to 1.3 ======================= - A new type called internally `float` has been implemented so as to be able to work natively with single-precision floating points. This prevents the silent upcast to `double` types that was taking place in previous versions, so allowing both an improved performance and an optimal usage of memory for the single-precision computations. However, the casting rules for floating point types slightly differs from those of NumPy. See: http://code.google.com/p/numexpr/wiki/Overview or the README.txt file for more info on this issue. - Support for Python 2.6 added. - When linking with the MKL, added a '-rpath' option to the link step so that the paths to MKL libraries are automatically included into the runtime library search path of the final package (i.e. the user won't need to update its LD_LIBRARY_PATH or LD_RUN_PATH environment variables anymore). Fixes #16. Changes from 1.1.1 to 1.2 ========================= - Support for Intel's VML (Vector Math Library) added, normally included in Intel's MKL (Math Kernel Library). In addition, when the VML support is on, several processors can be used in parallel (see the new `set_vml_num_threads()` function). With that, the computations of transcendental functions can be accelerated quite a few. For example, typical speed-ups when using one single core for contiguous arrays are 3x with peaks of 7.5x (for the pow() function). When using 2 cores the speed-ups are around 4x and 14x respectively. Closes #9. - Some new VML-related functions have been added: * set_vml_accuracy_mode(mode): Set the accuracy for VML operations. * set_vml_num_threads(nthreads): Suggests a maximum number of threads to be used in VML operations. * get_vml_version(): Get the VML/MKL library version. See the README.txt for more info about them. - In order to easily allow the detection of the MKL, the setup.py has been updated to use the numpy.distutils. So, if you are already used to link NumPy/SciPy with MKL, then you will find that giving VML support to numexpr works almost the same. - A new `print_versions()` function has been made available. This allows to quickly print the versions on which numexpr is based on. Very handy for issue reporting purposes. - The `numexpr.numexpr` compiler function has been renamed to `numexpr.NumExpr` in order to avoid name collisions with the name of the package (!). This function is mainly for internal use, so you should not need to upgrade your existing numexpr scripts. Changes from 1.1 to 1.1.1 ========================= - The case for multidimensional array operands is properly accelerated now. Added a new benchmark (based on a script provided by Andrew Collette, thanks!) for easily testing this case in the future. Closes #12. - Added a fix to avoid the caches in numexpr to grow too much. The dictionary caches are kept now always with less than 256 entries. Closes #11. - The VERSION file is correctly copied now (it was not present for the 1.1 tar file, I don't know exactly why). Closes #8. Changes from 1.0 to 1.1 ======================= - Numexpr can work now in threaded environments. Fixes #2. - The test suite can be run programmatically by using ``numexpr.test()``. - Support a more complete set of functions for expressions (including those that are not supported by MSVC 7.1 compiler, like the inverse hyperbolic or log1p and expm1 functions. The complete list now is: * where(bool, number1, number2): number Number1 if the bool condition is true, number2 otherwise. * {sin,cos,tan}(float|complex): float|complex Trigonometric sinus, cosinus or tangent. * {arcsin,arccos,arctan}(float|complex): float|complex Trigonometric inverse sinus, cosinus or tangent. * arctan2(float1, float2): float Trigonometric inverse tangent of float1/float2. * {sinh,cosh,tanh}(float|complex): float|complex Hyperbolic sinus, cosinus or tangent. * {arcsinh,arccosh,arctanh}(float|complex): float|complex Hyperbolic inverse sinus, cosinus or tangent. * {log,log10,log1p}(float|complex): float|complex Natural, base-10 and log(1+x) logarithms. * {exp,expm1}(float|complex): float|complex Exponential and exponential minus one. * sqrt(float|complex): float|complex Square root. * {real,imag}(complex): float Real or imaginary part of complex. * complex(float, float): complex Complex from real and imaginary parts. .. Local Variables: .. mode: rst .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.2.2/ANNOUNCE.txt0000644000175000001440000000332312221260713015615 0ustar faltetusers00000000000000========================== Announcing Numexpr 2.2.2 ========================== What's new ========== This is mainly a release for compatibility with PyTables < 3.0, which I decided to continue to support. For details, see: https://code.google.com/p/numexpr/issues/detail?id=115 Also, added a fix so that s390 architecture compiles. Fixes #116. Thanks to Antonio Valentino for reporting and for the patch. In case you want to know more in detail what has changed in this version, see: http://code.google.com/p/numexpr/wiki/ReleaseNotes or have a look at RELEASE_NOTES.txt in the tarball. What it is? =========== Numexpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like "3*a+4*b") are accelerated and use less memory than doing the same calculation in Python. It wears multi-threaded capabilities, as well as support for Intel's VML library (included in Intel MKL), which allows an extremely fast evaluation of transcendental functions (sin, cos, tan, exp, log...) while squeezing the last drop of performance out of your multi-core processors. Its only dependency is NumPy (MKL is optional), so it works well as an easy-to-deploy, easy-to-use, computational kernel for projects that don't want to adopt other solutions that require more heavy dependencies. Where I can find Numexpr? ========================= The project is hosted at Google code in: http://code.google.com/p/numexpr/ You can get the packages from PyPI as well: http://pypi.python.org/pypi/numexpr Share your experience ===================== Let us know of any bugs, suggestions, gripes, kudos, etc. you may have. Enjoy data! .. Local Variables: .. mode: rst .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.2.2/site.cfg.example0000644000175000001440000000353312136457700016741 0ustar faltetusers00000000000000# if setup does not find the MKL or does not use the configuration, copy this # file to "site.cfg" and edit the paths according to your installation of the # Intel MKL. # Note: some versions of MKL need to be linked to gfortran if compiled with # GNU C compiler. Uncomment next line if you get an error like "undefined # symbol: _gfortran_malloc" # # Note2: Some Fedora users reported that they had to install a # compatible version of the gfortran lib. See: # http://code.google.com/p/numexpr/issues/detail?id=15 # for more info. [DEFAULT] #libraries = gfortran [mkl] # Example for the MKL included in Intel C 11.0 compiler # (you may need a recent NumPy version for being able to search libraries # in different directories at a time) #library_dirs = /opt/intel/Compiler/11.0/074/mkl/lib/em64t/:/opt/intel/Compiler/11.0/074/lib/intel64 #include_dirs = /opt/intel/Compiler/11.0/074/mkl/include/ #mkl_libs = mkl_solver_ilp64, mkl_intel_ilp64, mkl_intel_thread, mkl_core, iomp5 # This seems to work for MKL 11 with processors with AVX (Sandy Bridge and above) for Linux #library_dirs = /opt/intel/composerxe/mkl/lib/intel64:/opt/intel/composer_xe_2013.3.163/compiler/lib/intel64 #include_dirs = /opt/intel/composerxe/mkl/include/ #mkl_libs = mkl_intel_lp64, mkl_gf_lp64, mkl_intel_thread, mkl_core, mkl_blas95_lp64, mkl_lapack95_lp64, mkl_avx, mkl_vml_avx, mkl_rt, iomp5 ## Example for using MKL 10.0 #library_dirs = /opt/intel/mkl/10.0.2.018/lib/em64t #include_dirs = /opt/intel/mkl/10.0.2.018/include # Example for using MKL 10.2 for Windows 64-bit #include_dirs = \Program Files\Intel\MKL\10.2.5.035\include #library_dirs = \Program Files\Intel\MKL\10.2.5.035\em64t\lib #mkl_libs = mkl_solver_ilp64, mkl_core, mkl_intel_thread, mkl_intel_ilp64, libiomp5md # The next works too, but for LP64 arithmetic #mkl_libs = mkl_core, mkl_intel_thread, mkl_intel_lp64, libiomp5md numexpr-2.2.2/AUTHORS.txt0000644000175000001440000000126112132261472015537 0ustar faltetusers00000000000000Numexpr was initially written by David Cooke, and extended to more types by Tim Hochberg. Francesc Alted contributed support for booleans and simple-precision floating point types, efficient strided and unaligned array operations and multi-threading code. Ivan Vilata contributed support for strings. Gregor Thalhammer implemented the support for Intel VML (Vector Math Library). Mark Wiebe added support for the new iterator in NumPy, which allows for better performance in more scenarios (like broadcasting, fortran-ordered or non-native byte orderings). Gaëtan de Menten contributed important bug fixes and speed enhancements. Antonio Valentino contributed the port to Python 3. numexpr-2.2.2/bench/0000755000175000001440000000000012221261175014727 5ustar faltetusers00000000000000numexpr-2.2.2/bench/issue-36.py0000644000175000001440000000152612132261472016664 0ustar faltetusers00000000000000# Small benchmark to get the even point where the threading code # performs better than the serial code. See issue #36 for details. import numpy as np import numexpr as ne from numpy.testing import assert_array_equal from time import time def bench(N): print "*** array length:", N a = np.arange(N) t0 = time() ntimes = (1000*2**15) // N for i in xrange(ntimes): ne.evaluate('a>1000') print "numexpr--> %.3g" % ((time()-t0)/ntimes,) t0 = time() for i in xrange(ntimes): eval('a>1000') print "numpy--> %.3g" % ((time()-t0)/ntimes,) if __name__ == "__main__": print "****** Testing with 1 thread..." ne.set_num_threads(1) for N in range(10, 20): bench(2**N) print "****** Testing with 2 threads..." ne.set_num_threads(2) for N in range(10, 20): bench(2**N) numexpr-2.2.2/bench/unaligned-simple.py0000644000175000001440000000254712132261472020547 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### """Very simple test that compares the speed of operating with aligned vs unaligned arrays. """ from timeit import Timer import numpy as np import numexpr as ne niter = 10 #shape = (1000*10000) # unidimensional test shape = (1000, 10000) # multidimensional test ne.print_versions() Z_fast = np.zeros(shape, dtype=[('x',np.float64),('y',np.int64)]) Z_slow = np.zeros(shape, dtype=[('y1',np.int8),('x',np.float64),('y2',np.int8,(7,))]) x_fast = Z_fast['x'] t = Timer("x_fast * x_fast", "from __main__ import x_fast") print "NumPy aligned: \t", round(min(t.repeat(3, niter)), 3), "s" x_slow = Z_slow['x'] t = Timer("x_slow * x_slow", "from __main__ import x_slow") print "NumPy unaligned:\t", round(min(t.repeat(3, niter)), 3), "s" t = Timer("ne.evaluate('x_fast * x_fast')", "from __main__ import ne, x_fast") print "Numexpr aligned:\t", round(min(t.repeat(3, niter)), 3), "s" t = Timer("ne.evaluate('x_slow * x_slow')", "from __main__ import ne, x_slow") print "Numexpr unaligned:\t", round(min(t.repeat(3, niter)), 3), "s" numexpr-2.2.2/bench/issue-47.py0000644000175000001440000000030312132261472016656 0ustar faltetusers00000000000000import numpy import numexpr numexpr.set_num_threads(8) x0,x1,x2,x3,x4,x5 = [0,1,2,3,4,5] t = numpy.linspace(0,1,44100000).reshape(-1,1) numexpr.evaluate('(x0+x1*t+x2*t**2)* cos(x3+x4*t+x5**t)') numexpr-2.2.2/bench/vml_timing.py0000644000175000001440000001317612132261472017457 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import sys import timeit import numpy import numexpr array_size = 1000*1000 iterations = 10 numpy_ttime = [] numpy_sttime = [] numpy_nttime = [] numexpr_ttime = [] numexpr_sttime = [] numexpr_nttime = [] def compare_times(expr, nexpr): global numpy_ttime global numpy_sttime global numpy_nttime global numexpr_ttime global numexpr_sttime global numexpr_nttime print "******************* Expression:", expr setup_contiguous = setupNP_contiguous setup_strided = setupNP_strided setup_unaligned = setupNP_unaligned numpy_timer = timeit.Timer(expr, setup_contiguous) numpy_time = round(numpy_timer.timeit(number=iterations), 4) numpy_ttime.append(numpy_time) print '%30s %.4f'%('numpy:', numpy_time / iterations) numpy_timer = timeit.Timer(expr, setup_strided) numpy_stime = round(numpy_timer.timeit(number=iterations), 4) numpy_sttime.append(numpy_stime) print '%30s %.4f'%('numpy strided:', numpy_stime / iterations) numpy_timer = timeit.Timer(expr, setup_unaligned) numpy_ntime = round(numpy_timer.timeit(number=iterations), 4) numpy_nttime.append(numpy_ntime) print '%30s %.4f'%('numpy unaligned:', numpy_ntime / iterations) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_contiguous) numexpr_time = round(numexpr_timer.timeit(number=iterations), 4) numexpr_ttime.append(numexpr_time) print '%30s %.4f'%("numexpr:", numexpr_time/iterations,), print "Speed-up of numexpr over numpy:", round(numpy_time/numexpr_time, 4) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_strided) numexpr_stime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_sttime.append(numexpr_stime) print '%30s %.4f'%("numexpr strided:", numexpr_stime/iterations,), print "Speed-up of numexpr over numpy:", \ round(numpy_stime/numexpr_stime, 4) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_unaligned) numexpr_ntime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_nttime.append(numexpr_ntime) print '%30s %.4f'%("numexpr unaligned:", numexpr_ntime/iterations,), print "Speed-up of numexpr over numpy:", \ round(numpy_ntime/numexpr_ntime, 4) print setupNP = """\ from numpy import arange, linspace, arctan2, sqrt, sin, cos, exp, log from numpy import rec as records #from numexpr import evaluate from numexpr import %s # Initialize a recarray of 16 MB in size r=records.array(None, formats='a%s,i4,f4,f8', shape=%s) c1 = r.field('f0')%s i2 = r.field('f1')%s f3 = r.field('f2')%s f4 = r.field('f3')%s c1[:] = "a" i2[:] = arange(%s)/1000 f3[:] = linspace(0,1,len(i2)) f4[:] = f3*1.23 """ eval_method = "evaluate" setupNP_contiguous = setupNP % ((eval_method, 4, array_size,) + \ (".copy()",)*4 + \ (array_size,)) setupNP_strided = setupNP % (eval_method, 4, array_size, "", "", "", "", array_size) setupNP_unaligned = setupNP % (eval_method, 1, array_size, "", "", "", "", array_size) expressions = [] expressions.append('i2 > 0') expressions.append('f3+f4') expressions.append('f3+i2') expressions.append('exp(f3)') expressions.append('log(exp(f3)+1)/f4') expressions.append('0.1*i2 > arctan2(f3, f4)') expressions.append('sqrt(f3**2 + f4**2) > 1') expressions.append('sin(f3)>cos(f4)') expressions.append('f3**f4') def compare(expression=False): if expression: compare_times(expression, 1) sys.exit(0) nexpr = 0 for expr in expressions: nexpr += 1 compare_times(expr, nexpr) print if __name__ == '__main__': import numexpr numexpr.print_versions() numpy.seterr(all='ignore') numexpr.set_vml_accuracy_mode('low') numexpr.set_vml_num_threads(2) if len(sys.argv) > 1: expression = sys.argv[1] print "expression-->", expression compare(expression) else: compare() tratios = numpy.array(numpy_ttime) / numpy.array(numexpr_ttime) stratios = numpy.array(numpy_sttime) / numpy.array(numexpr_sttime) ntratios = numpy.array(numpy_nttime) / numpy.array(numexpr_nttime) print "eval method: %s" % eval_method print "*************** Numexpr vs NumPy speed-ups *******************" # print "numpy total:", sum(numpy_ttime)/iterations # print "numpy strided total:", sum(numpy_sttime)/iterations # print "numpy unaligned total:", sum(numpy_nttime)/iterations # print "numexpr total:", sum(numexpr_ttime)/iterations print "Contiguous case:\t %s (mean), %s (min), %s (max)" % \ (round(tratios.mean(), 2), round(tratios.min(), 2), round(tratios.max(), 2)) # print "numexpr strided total:", sum(numexpr_sttime)/iterations print "Strided case:\t\t %s (mean), %s (min), %s (max)" % \ (round(stratios.mean(), 2), round(stratios.min(), 2), round(stratios.max(), 2)) # print "numexpr unaligned total:", sum(numexpr_nttime)/iterations print "Unaligned case:\t\t %s (mean), %s (min), %s (max)" % \ (round(ntratios.mean(), 2), round(ntratios.min(), 2), round(ntratios.max(), 2)) numexpr-2.2.2/bench/poly.py0000644000175000001440000000350612207603642016273 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### ####################################################################### # This script compares the speed of the computation of a polynomial # for different (numpy and numexpr) in-memory paradigms. # # Author: Francesc Alted # Date: 2010-07-06 ####################################################################### import sys from time import time import numpy as np import numexpr as ne #expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute expr = "((.25*x + .75)*x - 1.5)*x - 2" # a computer-friendly polynomial N = 10*1000*1000 # the number of points to compute expression x = np.linspace(-1, 1, N) # the x in range [-1, 1] #what = "numpy" # uses numpy for computations what = "numexpr" # uses numexpr for computations def compute(): """Compute the polynomial.""" if what == "numpy": y = eval(expr) else: y = ne.evaluate(expr) return len(y) if __name__ == '__main__': if len(sys.argv) > 1: # first arg is the package to use what = sys.argv[1] if len(sys.argv) > 2: # second arg is the number of threads to use nthreads = int(sys.argv[2]) if "ncores" in dir(ne): ne.set_num_threads(nthreads) if what not in ("numpy", "numexpr"): print "Unrecognized module:", what sys.exit(0) print "Computing: '%s' using %s with %d points" % (expr, what, N) t0 = time() result = compute() ts = round(time() - t0, 3) print "*** Time elapsed:", ts numexpr-2.2.2/bench/boolean_timing.py0000644000175000001440000001246412132261472020277 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import sys import timeit import numpy array_size = 1000*1000 iterations = 10 numpy_ttime = [] numpy_sttime = [] numpy_nttime = [] numexpr_ttime = [] numexpr_sttime = [] numexpr_nttime = [] def compare_times(expr, nexpr): global numpy_ttime global numpy_sttime global numpy_nttime global numexpr_ttime global numexpr_sttime global numexpr_nttime print "******************* Expression:", expr setup_contiguous = setupNP_contiguous setup_strided = setupNP_strided setup_unaligned = setupNP_unaligned numpy_timer = timeit.Timer(expr, setup_contiguous) numpy_time = round(numpy_timer.timeit(number=iterations), 4) numpy_ttime.append(numpy_time) print 'numpy:', numpy_time / iterations numpy_timer = timeit.Timer(expr, setup_strided) numpy_stime = round(numpy_timer.timeit(number=iterations), 4) numpy_sttime.append(numpy_stime) print 'numpy strided:', numpy_stime / iterations numpy_timer = timeit.Timer(expr, setup_unaligned) numpy_ntime = round(numpy_timer.timeit(number=iterations), 4) numpy_nttime.append(numpy_ntime) print 'numpy unaligned:', numpy_ntime / iterations evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_contiguous) numexpr_time = round(numexpr_timer.timeit(number=iterations), 4) numexpr_ttime.append(numexpr_time) print "numexpr:", numexpr_time/iterations, print "Speed-up of numexpr over numpy:", round(numpy_time/numexpr_time, 4) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_strided) numexpr_stime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_sttime.append(numexpr_stime) print "numexpr strided:", numexpr_stime/iterations, print "Speed-up of numexpr strided over numpy:", \ round(numpy_stime/numexpr_stime, 4) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_unaligned) numexpr_ntime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_nttime.append(numexpr_ntime) print "numexpr unaligned:", numexpr_ntime/iterations, print "Speed-up of numexpr unaligned over numpy:", \ round(numpy_ntime/numexpr_ntime, 4) setupNP = """\ from numpy import arange, where, arctan2, sqrt from numpy import rec as records from numexpr import evaluate # Initialize a recarray of 16 MB in size r=records.array(None, formats='a%s,i4,f8', shape=%s) c1 = r.field('f0')%s i2 = r.field('f1')%s f3 = r.field('f2')%s c1[:] = "a" i2[:] = arange(%s)/1000 f3[:] = i2/2. """ setupNP_contiguous = setupNP % (4, array_size, ".copy()", ".copy()", ".copy()", array_size) setupNP_strided = setupNP % (4, array_size, "", "", "", array_size) setupNP_unaligned = setupNP % (1, array_size, "", "", "", array_size) expressions = [] expressions.append('i2 > 0') expressions.append('i2 < 0') expressions.append('i2 < f3') expressions.append('i2-10 < f3') expressions.append('i2*f3+f3*f3 > i2') expressions.append('0.1*i2 > arctan2(i2, f3)') expressions.append('i2%2 > 3') expressions.append('i2%10 < 4') expressions.append('i2**2 + (f3+1)**-2.5 < 3') expressions.append('(f3+1)**50 > i2') expressions.append('sqrt(i2**2 + f3**2) > 1') expressions.append('(i2>2) | ((f3**2>3) & ~(i2*f3<2))') def compare(expression=False): if expression: compare_times(expression, 1) sys.exit(0) nexpr = 0 for expr in expressions: nexpr += 1 compare_times(expr, nexpr) print if __name__ == '__main__': import numexpr numexpr.print_versions() if len(sys.argv) > 1: expression = sys.argv[1] print "expression-->", expression compare(expression) else: compare() tratios = numpy.array(numpy_ttime) / numpy.array(numexpr_ttime) stratios = numpy.array(numpy_sttime) / numpy.array(numexpr_sttime) ntratios = numpy.array(numpy_nttime) / numpy.array(numexpr_nttime) print "*************** Numexpr vs NumPy speed-ups *******************" # print "numpy total:", sum(numpy_ttime)/iterations # print "numpy strided total:", sum(numpy_sttime)/iterations # print "numpy unaligned total:", sum(numpy_nttime)/iterations # print "numexpr total:", sum(numexpr_ttime)/iterations print "Contiguous case:\t %s (mean), %s (min), %s (max)" % \ (round(tratios.mean(), 2), round(tratios.min(), 2), round(tratios.max(), 2)) # print "numexpr strided total:", sum(numexpr_sttime)/iterations print "Strided case:\t\t %s (mean), %s (min), %s (max)" % \ (round(stratios.mean(), 2), round(stratios.min(), 2), round(stratios.max(), 2)) # print "numexpr unaligned total:", sum(numexpr_nttime)/iterations print "Unaligned case:\t\t %s (mean), %s (min), %s (max)" % \ (round(ntratios.mean(), 2), round(ntratios.min(), 2), round(ntratios.max(), 2)) numexpr-2.2.2/bench/multidim.py0000644000175000001440000000515412132261472017133 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### # Script to check that multidimensional arrays are speed-up properly too # Based on a script provided by Andrew Collette. import numpy as np import numexpr as nx import time test_shapes = [ (100*100*100), (100*100,100), (100,100,100), ] test_dtype = 'f4' nruns = 10 # Ensemble for timing def chunkify(chunksize): """ Very stupid "chunk vectorizer" which keeps memory use down. This version requires all inputs to have the same number of elements, although it shouldn't be that hard to implement simple broadcasting. """ def chunkifier(func): def wrap(*args): assert len(args) > 0 assert all(len(a.flat) == len(args[0].flat) for a in args) nelements = len(args[0].flat) nchunks, remain = divmod(nelements, chunksize) out = np.ndarray(args[0].shape) for start in xrange(0, nelements, chunksize): #print start stop = start+chunksize if start+chunksize > nelements: stop = nelements-start iargs = tuple(a.flat[start:stop] for a in args) out.flat[start:stop] = func(*iargs) return out return wrap return chunkifier test_func_str = "63 + (a*b) + (c**2) + b" def test_func(a, b, c): return 63 + (a*b) + (c**2) + b test_func_chunked = chunkify(100*100)(test_func) for test_shape in test_shapes: test_size = np.product(test_shape) # The actual data we'll use a = np.arange(test_size, dtype=test_dtype).reshape(test_shape) b = np.arange(test_size, dtype=test_dtype).reshape(test_shape) c = np.arange(test_size, dtype=test_dtype).reshape(test_shape) start1 = time.time() for idx in xrange(nruns): result1 = test_func(a, b, c) stop1 = time.time() start2 = time.time() for idx in xrange(nruns): result2 = nx.evaluate(test_func_str) stop2 = time.time() start3 = time.time() for idx in xrange(nruns): result3 = test_func_chunked(a, b, c) stop3 = time.time() print "%s %s (average of %s runs)" % (test_shape, test_dtype, nruns) print "Simple: ", (stop1-start1)/nruns print "Numexpr: ", (stop2-start2)/nruns print "Chunked: ", (stop3-start3)/nruns numexpr-2.2.2/bench/timing.py0000644000175000001440000000750012132261472016573 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import timeit, numpy array_size = 1e6 iterations = 2 # Choose the type you want to benchmark #dtype = 'int8' #dtype = 'int16' #dtype = 'int32' #dtype = 'int64' dtype = 'float32' #dtype = 'float64' def compare_times(setup, expr): print "Expression:", expr namespace = {} exec setup in namespace numpy_timer = timeit.Timer(expr, setup) numpy_time = numpy_timer.timeit(number=iterations) print 'numpy:', numpy_time / iterations try: weave_timer = timeit.Timer('blitz("result=%s")' % expr, setup) weave_time = weave_timer.timeit(number=iterations) print "Weave:", weave_time/iterations print "Speed-up of weave over numpy:", round(numpy_time/weave_time, 2) except: print "Skipping weave timing" numexpr_timer = timeit.Timer('evaluate("%s", optimization="aggressive")' % expr, setup) numexpr_time = numexpr_timer.timeit(number=iterations) print "numexpr:", numexpr_time/iterations tratio = numpy_time/numexpr_time print "Speed-up of numexpr over numpy:", round(tratio, 2) return tratio setup1 = """\ from numpy import arange try: from scipy.weave import blitz except: pass from numexpr import evaluate result = arange(%f, dtype='%s') b = arange(%f, dtype='%s') c = arange(%f, dtype='%s') d = arange(%f, dtype='%s') e = arange(%f, dtype='%s') """ % ((array_size, dtype)*5) expr1 = 'b*c+d*e' setup2 = """\ from numpy import arange try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(%f, dtype='%s') b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr2 = '2*a+3*b' setup3 = """\ from numpy import arange, sin, cos, sinh try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr3 = '2*a + (cos(3)+5)*sinh(cos(b))' setup4 = """\ from numpy import arange, sin, cos, sinh, arctan2 try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr4 = '2*a + arctan2(a, b)' setup5 = """\ from numpy import arange, sin, cos, sinh, arctan2, sqrt, where try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr5 = 'where(0.1*a > arctan2(a, b), 2*a, arctan2(a,b))' expr6 = 'where(a != 0.0, 2, b)' expr7 = 'where(a-10 != 0.0, a, 2)' expr8 = 'where(a%2 != 0.0, b+5, 2)' expr9 = 'where(a%2 != 0.0, 2, b+5)' expr10 = 'a**2 + (b+1)**-2.5' expr11 = '(a+1)**50' expr12 = 'sqrt(a**2 + b**2)' def compare(check_only=False): experiments = [(setup1, expr1), (setup2, expr2), (setup3, expr3), (setup4, expr4), (setup5, expr5), (setup5, expr6), (setup5, expr7), (setup5, expr8), (setup5, expr9), (setup5, expr10), (setup5, expr11), (setup5, expr12), ] total = 0 for params in experiments: total += compare_times(*params) print average = total / len(experiments) print "Average =", round(average, 2) return average if __name__ == '__main__': import numexpr numexpr.print_versions() averages = [] for i in range(iterations): averages.append(compare()) print "Averages:", ', '.join("%.2f" % x for x in averages) numexpr-2.2.2/bench/varying-expr.py0000644000175000001440000000301612132261472017735 0ustar faltetusers00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### # Benchmark for checking if numexpr leaks memory when evaluating # expressions that changes continously. It also serves for computing # the latency of numexpr when working with small arrays. import sys from time import time import numpy as np import numexpr as ne N = 100 M = 10 def timed_eval(eval_func, expr_func): t1 = time() for i in xrange(N): r = eval_func(expr_func(i)) if i % 10 == 0: sys.stdout.write('.') print " done in %s seconds" % round(time() - t1, 3) print "Number of iterations %s. Length of the array: %s " % (N, M) a = np.arange(M) # lots of duplicates to collapse #expr = '+'.join('(a + 1) * %d' % i for i in range(50)) # no duplicate to collapse expr = '+'.join('(a + %d) * %d' % (i, i) for i in range(50)) def non_cacheable(i): return expr + '+ %d' % i def cacheable(i): return expr + '+ i' print "* Numexpr with non-cacheable expressions: ", timed_eval(ne.evaluate, non_cacheable) print "* Numexpr with cacheable expressions: ", timed_eval(ne.evaluate, cacheable) print "* Numpy with non-cacheable expressions: ", timed_eval(eval, non_cacheable) print "* Numpy with cacheable expressions: ", timed_eval(eval, cacheable)