pax_global_header00006660000000000000000000000064136037552510014521gustar00rootroot0000000000000052 comment=01e5c82410e1c6e1d842308034aaa9622576f5a5 numexpr-2.7.1/000077500000000000000000000000001360375525100132265ustar00rootroot00000000000000numexpr-2.7.1/.gitignore000066400000000000000000000001711360375525100152150ustar00rootroot00000000000000syntax: glob build/ build-cmake/ dist/ numexpr.egg-info/ *.pyc *.swp *~ doc/_build site.cfg .vscode *.pyd __config__.py numexpr-2.7.1/.hgeol000066400000000000000000000002451360375525100143260ustar00rootroot00000000000000[repository] native = LF [patterns] **.py = native **.c = native **.cpp = native **.inc = native **.h = native **.txt = native site.cfg.example = native numexpr-2.7.1/.mailmap000066400000000000000000000005211360375525100146450ustar00rootroot00000000000000Francesc Alted Francesc Alted Francesc Alted Francesc Alted Francesc Alted Francesc Alted Mark Wiebe mwwiebe Mark Wiebe mwiebe numexpr-2.7.1/.travis.yml000066400000000000000000000006001360375525100153330ustar00rootroot00000000000000arch: - amd64 - arm64 sudo: false language: python python: - 2.7 - 3.4 - 3.5 - 3.6 - 3.7 - 3.8 before_install: - uname -a - free -m - df -h - ulimit -a - pip install -q numpy - python -V install: - pip install . script: - mkdir empty - cd empty - python -c "import sys;import numexpr;sys.exit(0 if numexpr.test().wasSuccessful() else 1)" numexpr-2.7.1/ANNOUNCE.rst000066400000000000000000000036221360375525100151710ustar00rootroot00000000000000========================= Announcing Numexpr 2.7.1 ========================= Hi everyone, This is a version bump to add support for Python 3.8 and NumPy 1.18. We are also removing support for Python 3.4. Project documentation is available at: http://numexpr.readthedocs.io/ Changes from 2.7.0 to 2.7.1 ---------------------------- - Python 3.8 support has been added. - Python 3.4 support is discontinued. - The tests are now compatible with NumPy 1.18. - `site.cfg.example` was updated to use the `libraries` tag instead of `mkl_libs`, which is recommended for newer version of NumPy. What's Numexpr? --------------- Numexpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like "3*a+4*b") are accelerated and use less memory than doing the same calculation in Python. It has multi-threaded capabilities, as well as support for Intel's MKL (Math Kernel Library), which allows an extremely fast evaluation of transcendental functions (sin, cos, tan, exp, log...) while squeezing the last drop of performance out of your multi-core processors. Look here for a some benchmarks of numexpr using MKL: https://github.com/pydata/numexpr/wiki/NumexprMKL Its only dependency is NumPy (MKL is optional), so it works well as an easy-to-deploy, easy-to-use, computational engine for projects that don't want to adopt other solutions requiring more heavy dependencies. Where I can find Numexpr? ------------------------- The project is hosted at GitHub in: https://github.com/pydata/numexpr You can get the packages from PyPI as well (but not for RC releases): http://pypi.python.org/pypi/numexpr Documentation is hosted at: http://numexpr.readthedocs.io/en/latest/ Share your experience --------------------- Let us know of any bugs, suggestions, gripes, kudos, etc. you may have. Enjoy data! .. Local Variables: .. mode: rst .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.7.1/AUTHORS.txt000066400000000000000000000015441360375525100151200ustar00rootroot00000000000000Numexpr was initially written by David Cooke, and extended to more types by Tim Hochberg. Francesc Alted contributed support for booleans and simple-precision floating point types, efficient strided and unaligned array operations and multi-threading code. Ivan Vilata contributed support for strings. Gregor Thalhammer implemented the support for Intel VML (Vector Math Library). Mark Wiebe added support for the new iterator in NumPy, which allows for better performance in more scenarios (like broadcasting, fortran-ordered or non-native byte orderings). Gaëtan de Menten contributed important bug fixes and speed enhancements. Antonio Valentino contributed the port to Python 3. Google Inc. contributed bug fixes. David Cox improved readability of the Readme. Robert A. McLeod contributed bug fixes and ported the documentation to numexpr.readthedocs.io.numexpr-2.7.1/CMakeLists.txt000066400000000000000000000064311360375525100157720ustar00rootroot00000000000000# WARNING! EXPERIMENTAL! Use setup.py if you're not familiar with cmake. # # We recommend that you create a separate directory for the build, # so that the build files aren't mixed in with the source files. # e.g. # $ mkdir build-cmake # $ cd build-cmake # $ cmake .. # $ make # # MacOSX Notes: # On MacOSX, it may default to 64-bit, even if your Python is 32-bit. # The linker will give NO WARNING, and the resulting .so file will # fail to load in Python. To fix this, run cmake as follows, # assuming you're in /build-cmake/: # cmake -DCMAKE_OSX_ARCHITECTURES=i386 .. # To debug this issue, you can compare "lipo -info `which python`" # with "lipo -info numexpr.so". They should have the same platform info. # # Further problems on OS X appear to be related to EPD Python. CMake's # default detection may be detecting the wrong python to link against. # # Windows Notes: # Python 2.7 is built with Visual C++ 9 (aka 2008). This is the one # you should pick when running cmake-gui. Be sure to switch the build # configuration from Debug to Release (or RelWithDebInfo) in Visual Studio. project(numexpr) cmake_minimum_required(VERSION 2.8) # Force the default build type to be Release, because a Debug # build doesn't work properly with the default Python build if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) find_package(PythonInterp REQUIRED) find_package(PythonLibsNew REQUIRED) find_package(NumPy REQUIRED) # Default install location for Python packages if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(CMAKE_INSTALL_PREFIX "${PYTHON_SITE_PACKAGES}" CACHE STRING "Choose the Python module directory (default site-packages)" FORCE) endif() # Require version >= 1.6 if(NUMPY_VERSION_DECIMAL LESS 10600) message(FATAL_ERROR, "NumExpr requires NumPy >= 1.6") endif() include_directories( ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIRS} ) set(numexpr_SRC numexpr/interpreter.cpp numexpr/module.cpp numexpr/numexpr_object.cpp numexpr/complex_functions.hpp numexpr/functions.hpp numexpr/interpreter.hpp numexpr/module.hpp numexpr/missing_posix_functions.hpp numexpr/msvc_function_stubs.hpp numexpr/numexpr_config.hpp numexpr/numexpr_object.hpp numexpr/opcodes.hpp ) if(CMAKE_HOST_WIN32) set(numexpr_SRC ${numexpr_SRC} numexpr/win32/pthread.c ) endif() python_add_module(interpreter ${numexpr_SRC}) # Generate __config__.py. This is a dummy placeholder, as I # don't know why it's here. file(WRITE "${PROJECT_BINARY_DIR}/__config__.py" "# This file is generated by a CMakeFiles.txt configuration\n" "__all__ = ['get_info','show']\n" "def get_info(name):\n" " return None\n" "def show():\n" " print('someone called show()')\n") # Install all the Python scripts install(DIRECTORY numexpr DESTINATION "${CMAKE_INSTALL_PREFIX}" FILES_MATCHING PATTERN "*.py") # Install __config__.py install(FILES "${PROJECT_BINARY_DIR}/__config__.py" DESTINATION "${CMAKE_INSTALL_PREFIX}/numexpr") # Install the module install(TARGETS interpreter DESTINATION "${CMAKE_INSTALL_PREFIX}/numexpr") numexpr-2.7.1/FindNumPy.cmake000066400000000000000000000102321360375525100160770ustar00rootroot00000000000000# - Find the NumPy libraries # This module finds if NumPy is installed, and sets the following variables # indicating where it is. # # TODO: Update to provide the libraries and paths for linking npymath lib. # # NUMPY_FOUND - was NumPy found # NUMPY_VERSION - the version of NumPy found as a string # NUMPY_VERSION_MAJOR - the major version number of NumPy # NUMPY_VERSION_MINOR - the minor version number of NumPy # NUMPY_VERSION_PATCH - the patch version number of NumPy # NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 # NUMPY_INCLUDE_DIRS - path to the NumPy include files #============================================================================ # Copyright 2012 Continuum Analytics, Inc. # # MIT License # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files # (the "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to permit # persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # #============================================================================ # Finding NumPy involves calling the Python interpreter if(NumPy_FIND_REQUIRED) find_package(PythonInterp REQUIRED) else() find_package(PythonInterp) endif() if(NOT PYTHONINTERP_FOUND) set(NUMPY_FOUND FALSE) return() endif() execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import numpy as n; print(n.__version__); print(n.get_include());" RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT ERROR_VARIABLE _NUMPY_ERROR_VALUE OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0) if(NumPy_FIND_REQUIRED) message(FATAL_ERROR "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") endif() set(NUMPY_FOUND FALSE) return() endif() # Convert the process output into a list string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT}) string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) # Just in case there is unexpected output from the Python command. list(GET _NUMPY_VALUES -2 NUMPY_VERSION) list(GET _NUMPY_VALUES -1 NUMPY_INCLUDE_DIRS) string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}") if("${_VER_CHECK}" STREQUAL "") # The output from Python was unexpected. Raise an error always # here, because we found NumPy, but it appears to be corrupted somehow. message(FATAL_ERROR "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n") return() endif() # Make sure all directory separators are '/' string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) # Get the major and minor version numbers string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION}) list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR) list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR) list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH) string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH}) math(EXPR NUMPY_VERSION_DECIMAL "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") find_package_message(NUMPY "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}" "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}") set(NUMPY_FOUND TRUE) numexpr-2.7.1/FindPythonLibsNew.cmake000066400000000000000000000233301360375525100175770ustar00rootroot00000000000000# - Find python libraries # This module finds the libraries corresponding to the Python interpeter # FindPythonInterp provides. # This code sets the following variables: # # PYTHONLIBS_FOUND - have the Python libs been found # PYTHON_PREFIX - path to the Python installation # PYTHON_LIBRARIES - path to the python library # PYTHON_INCLUDE_DIRS - path to where Python.h is found # PYTHON_SITE_PACKAGES - path to installation site-packages # PYTHON_IS_DEBUG - whether the Python interpreter is a debug build # # PYTHON_INCLUDE_PATH - path to where Python.h is found (deprecated) # # A function PYTHON_ADD_MODULE( src1 src2 ... srcN) is defined # to build modules for python. # # Thanks to talljimbo for the patch adding the 'LDVERSION' config # variable usage. #============================================================================= # Copyright 2001-2009 Kitware, Inc. # Copyright 2012 Continuum Analytics, Inc. # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # * Neither the names of Kitware, Inc., the Insight Software Consortium, # nor the names of their contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #============================================================================= # (To distribute this file outside of CMake, substitute the full # License text for the above reference.) # Use the Python interpreter to find the libs. if(PythonLibsNew_FIND_REQUIRED) find_package(PythonInterp REQUIRED) else() find_package(PythonInterp) endif() if(NOT PYTHONINTERP_FOUND) set(PYTHONLIBS_FOUND FALSE) return() endif() # According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter # testing whether sys has the gettotalrefcount function is a reliable, cross-platform # way to detect a CPython debug interpreter. # # The library suffix is from the config var LDVERSION sometimes, otherwise # VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows. execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "from distutils import sysconfig as s;import sys;import struct; print('.'.join(str(v) for v in sys.version_info)); print(sys.prefix); print(s.get_python_inc(plat_specific=True)); print(s.get_python_lib(plat_specific=True)); print(s.get_config_var('SO')); print(hasattr(sys, 'gettotalrefcount')+0); print(struct.calcsize('@P')); print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); " RESULT_VARIABLE _PYTHON_SUCCESS OUTPUT_VARIABLE _PYTHON_VALUES ERROR_VARIABLE _PYTHON_ERROR_VALUE OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT _PYTHON_SUCCESS MATCHES 0) if(PythonLibsNew_FIND_REQUIRED) message(FATAL_ERROR "Python config failure:\n${_PYTHON_ERROR_VALUE}") endif() set(PYTHONLIBS_FOUND FALSE) return() endif() # Convert the process output into a list string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST) list(GET _PYTHON_VALUES 1 PYTHON_PREFIX) list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR) list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES) list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION) list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG) list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P) list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX) # Make sure the Python has the same pointer-size as the chosen compiler # Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}")) if(PythonLibsNew_FIND_REQUIRED) math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8") math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8") message(FATAL_ERROR "Python config failure: Python is ${_PYTHON_BITS}-bit, " "chosen compiler is ${_CMAKE_BITS}-bit") endif() set(PYTHONLIBS_FOUND FALSE) return() endif() # The built-in FindPython didn't always give the version numbers string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST}) list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR) list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR) list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH) # Make sure all directory separators are '/' string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR}) string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES}) # TODO: All the nuances of CPython debug builds have not been dealt with/tested. if(PYTHON_IS_DEBUG) set(PYTHON_MODULE_EXTENSION "_d${PYTHON_MODULE_EXTENSION}") endif() if(CMAKE_HOST_WIN32) set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") elseif(APPLE) # Seems to require "-undefined dynamic_lookup" instead of linking # against the .dylib, otherwise it crashes. This flag is added # below set(PYTHON_LIBRARY "") #set(PYTHON_LIBRARY # "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib") else() if(${PYTHON_SIZEOF_VOID_P} MATCHES 8) set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib") else() set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib") endif() message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}") # Probably this needs to be more involved. It would be nice if the config # information the python interpreter itself gave us were more complete. find_library(PYTHON_LIBRARY NAMES "python${PYTHON_LIBRARY_SUFFIX}" PATHS ${_PYTHON_LIBS_SEARCH} NO_DEFAULT_PATH) message(STATUS "Found Python lib ${PYTHON_LIBRARY}") endif() # For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal. SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL "Path to where Python.h is found (deprecated)") MARK_AS_ADVANCED( PYTHON_LIBRARY PYTHON_INCLUDE_DIR ) # We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the # cache entries because they are meant to specify the location of a single # library. We now set the variables listed by the documentation for this # module. SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}") SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}") # Don't know how to get to this directory, just doing something simple :P #INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) #FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS) find_package_message(PYTHON "Found PythonLibs: ${PYTHON_LIBRARY}" "${PYTHON_EXECUTABLE}${PYTHON_VERSION}") # PYTHON_ADD_MODULE( src1 src2 ... srcN) is used to build modules for python. FUNCTION(PYTHON_ADD_MODULE _NAME ) GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS) OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE) OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS}) # Mark these options as advanced MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME} PYTHON_MODULE_${_NAME}_BUILD_SHARED) IF(PYTHON_ENABLE_MODULE_${_NAME}) IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) SET(PY_MODULE_TYPE MODULE) ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED) SET(PY_MODULE_TYPE STATIC) SET_PROPERTY(GLOBAL APPEND PROPERTY PY_STATIC_MODULES_LIST ${_NAME}) ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) SET_PROPERTY(GLOBAL APPEND PROPERTY PY_MODULES_LIST ${_NAME}) ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN}) IF(APPLE) # On OS X, linking against the Python libraries causes # segfaults, so do this dynamic lookup instead. SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") ELSE() TARGET_LINK_LIBRARIES(${_NAME} ${PYTHON_LIBRARIES}) ENDIF() IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}") SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}") ELSE() ENDIF() ENDIF(PYTHON_ENABLE_MODULE_${_NAME}) ENDFUNCTION(PYTHON_ADD_MODULE) numexpr-2.7.1/INSTALL.rst000066400000000000000000000031151360375525100150660ustar00rootroot00000000000000================== Installing Numexpr ================== These are instructions for installing Numexpr on Unix systems. For Windows, it is best to install it from binaries. However, you should note that, for the time being, we cannot provide Windows binaries with MKL support. Building ======== This version of `Numexpr` requires Python 2.6 or greater, and NumPy 1.6 or greater. It's built in the standard Python way:: $ python setup.py build $ python setup.py install You can test `numexpr` with: $ python -c "import numexpr; numexpr.test()" Enabling Intel's MKL support ============================ numexpr includes support for Intel's MKL library. This allows for better performance on Intel architectures, mainly when evaluating transcendental functions (trigonometrical, exponential...). It also enables numexpr using several CPU cores. If you have Intel's MKL, just copy the `site.cfg.example` that comes in the distribution to `site.cfg` and edit the latter giving proper directions on how to find your MKL libraries in your system. After doing this, you can proceed with the usual building instructions listed above. Pay attention to the messages during the building process in order to know whether MKL has been detected or not. Finally, you can check the speed-ups on your machine by running the `bench/vml_timing.py` script (you can play with different parameters to the `set_vml_accuracy_mode()` and `set_vml_num_threads()` functions in the script so as to see how it would affect performance). .. Local Variables: .. mode: text .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.7.1/LICENSE.txt000066400000000000000000000022511360375525100150510ustar00rootroot00000000000000Copyright (c) 2007,2008 David M. Cooke Copyright (c) 2009,2010 Francesc Alted Copyright (c) 2011- See AUTHORS.txt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. numexpr-2.7.1/LICENSES/000077500000000000000000000000001360375525100144335ustar00rootroot00000000000000numexpr-2.7.1/LICENSES/cpuinfo.txt000066400000000000000000000030201360375525100166320ustar00rootroot00000000000000Copyright statement for `cpuinfo` module. Copyright 2002 Pearu Peterson all rights reserved, Pearu Peterson Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Pearu Peterson nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. HIS SOFTWARE IS PROVIDED BY PEARU PETERSON ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PEARU PETERSON BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. numexpr-2.7.1/MANIFEST.in000066400000000000000000000003471360375525100147700ustar00rootroot00000000000000include MANIFEST.in include *.rst *.txt *.cfg site.cfg.example recursive-include numexpr *.cpp *.hpp *.py recursive-include numexpr/win32 *.c *.h exclude numexpr/__config__.py RELEASING.txt site.cfg recursive-include bench *.py numexpr-2.7.1/README.rst000066400000000000000000000106141360375525100147170ustar00rootroot00000000000000====================================================== NumExpr: Fast numerical expression evaluator for NumPy ====================================================== :Author: David M. Cooke, Francesc Alted and others :Contact: faltet@gmail.com :URL: https://github.com/pydata/numexpr :Documentation: http://numexpr.readthedocs.io/en/latest/ :Travis CI: |travis| :Appveyor: |appveyor| :PyPi: |version| :DOI: |doi| :readthedocs: |docs| .. |travis| image:: https://travis-ci.org/pydata/numexpr.png?branch=master :target: https://travis-ci.org/pydata/numexpr .. |appveyor| image:: https://ci.appveyor.com/api/projects/status/we2ff01vqlmlb9ip :target: https://ci.appveyor.com/project/robbmcleod/numexpr .. |docs| image:: https://readthedocs.org/projects/numexpr/badge/?version=latest :target: http://numexpr.readthedocs.io/en/latest .. |doi| image:: https://zenodo.org/badge/doi/10.5281/zenodo.2483274.svg :target: https://doi.org/10.5281/zenodo.2483274 .. |version| image:: https://img.shields.io/pypi/v/numexpr.png :target: https://pypi.python.org/pypi/numexpr What is NumExpr? ---------------- NumExpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like :code:`'3*a+4*b'`) are accelerated and use less memory than doing the same calculation in Python. In addition, its multi-threaded capabilities can make use of all your cores -- which generally results in substantial performance scaling compared to NumPy. Last but not least, numexpr can make use of Intel's VML (Vector Math Library, normally integrated in its Math Kernel Library, or MKL). This allows further acceleration of transcendent expressions. How NumExpr achieves high performance ------------------------------------- The main reason why NumExpr achieves better performance than NumPy is that it avoids allocating memory for intermediate results. This results in better cache utilization and reduces memory access in general. Due to this, NumExpr works best with large arrays. NumExpr parses expressions into its own op-codes that are then used by an integrated computing virtual machine. The array operands are split into small chunks that easily fit in the cache of the CPU and passed to the virtual machine. The virtual machine then applies the operations on each chunk. It's worth noting that all temporaries and constants in the expression are also chunked. Chunks are distributed among the available cores of the CPU, resulting in highly parallelized code execution. The result is that NumExpr can get the most of your machine computing capabilities for array-wise computations. Common speed-ups with regard to NumPy are usually between 0.95x (for very simple expressions like :code:`'a + 1'`) and 4x (for relatively complex ones like :code:`'a*b-4.1*a > 2.5*b'`), although much higher speed-ups can be achieved for some functions and complex math operations (up to 15x in some cases). NumExpr performs best on matrices that are too large to fit in L1 CPU cache. In order to get a better idea on the different speed-ups that can be achieved on your platform, run the provided benchmarks. Usage ----- :: >>> import numpy as np >>> import numexpr as ne >>> a = np.arange(1e6) # Choose large arrays for better speedups >>> b = np.arange(1e6) >>> ne.evaluate("a + 1") # a simple expression array([ 1.00000000e+00, 2.00000000e+00, 3.00000000e+00, ..., 9.99998000e+05, 9.99999000e+05, 1.00000000e+06]) >>> ne.evaluate('a*b-4.1*a > 2.5*b') # a more complex one array([False, False, False, ..., True, True, True], dtype=bool) >>> ne.evaluate("sin(a) + arcsinh(a/b)") # you can also use functions array([ NaN, 1.72284457, 1.79067101, ..., 1.09567006, 0.17523598, -0.09597844]) >>> s = np.array(['abba', 'abbb', 'abbcdef']) >>> ne.evaluate("'abba' == s") # string arrays are supported too array([ True, False, False], dtype=bool) Documentation ------------- Please see the official documentation at `numexpr.readthedocs.io `_. Included is a user guide, benchmark results, and the reference API. Authors ------- Please see `AUTHORS.txt `_. License ------- NumExpr is distributed under the `MIT `_ license. .. Local Variables: .. mode: text .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.7.1/RELEASE_NOTES.rst000066400000000000000000000544111360375525100160150ustar00rootroot00000000000000==================================== Release notes for Numexpr 2.7 series ==================================== Changes from 2.7.0 to 2.7.1 ---------------------------- - Python 3.8 support has been added. - Python 3.4 support is discontinued. - The tests are now compatible with NumPy 1.18. - `site.cfg.example` was updated to use the `libraries` tag instead of `mkl_libs`, which is recommended for newer version of NumPy. Changes from 2.6.9 to 2.7.0 ---------------------------- - The default number of 'safe' threads has been restored to the historical limit of 8, if the environment variable "NUMEXPR_MAX_THREADS" has not been set. - Thanks to @eltoder who fixed a small memory leak. - Support for Python 2.6 has been dropped, as it is no longer available via TravisCI. - A typo in the test suite that had a less than rather than greater than symbol in the NumPy version check has been corrected thanks to dhomeier. - The file `site.cfg` was being accidently included in the sdists on PyPi. It has now been excluded. Changes from 2.6.8 to 2.6.9 --------------------------- - Thanks to Mike Toews for more robust handling of the thread-setting environment variables. - With Appveyor updating to Python 3.7.1, wheels for Python 3.7 are now available in addition to those for other OSes. Changes from 2.6.7 to 2.6.8 --------------------------- - Add check to make sure that `f_locals` is not actually `f_globals` when we do the `f_locals` clear to avoid the #310 memory leak issue. - Compare NumPy versions using `distutils.version.LooseVersion` to avoid issue #312 when working with NumPy development versions. - As part of `multibuild`, wheels for Python 3.7 for Linux and MacOSX are now available on PyPI. Changes from 2.6.6 to 2.6.7 --------------------------- - Thanks to Lehman Garrison for finding and fixing a bug that exhibited memory leak-like behavior. The use in `numexpr.evaluate` of `sys._getframe` combined with `.f_locals` from that frame object results an extra refcount on objects in the frame that calls `numexpr.evaluate`, and not `evaluate`'s frame. So if the calling frame remains in scope for a long time (such as a procedural script where `numexpr` is called from the base frame) garbage collection would never occur. - Imports for the `numexpr.test` submodule were made lazy in the `numexpr` module. Changes from 2.6.5 to 2.6.6 --------------------------- - Thanks to Mark Dickinson for a fix to the thread barrier that occassionally suffered from spurious wakeups on MacOSX. Changes from 2.6.4 to 2.6.5 --------------------------- - The maximum thread count can now be set at import-time by setting the environment variable 'NUMEXPR_MAX_THREADS'. The default number of max threads was lowered from 4096 (which was deemed excessive) to 64. - A number of imports were removed (pkg_resources) or made lazy (cpuinfo) in order to speed load-times for downstream packages (such as `pandas`, `sympy`, and `tables`). Import time has dropped from about 330 ms to 90 ms. Thanks to Jason Sachs for pointing out the source of the slow-down. - Thanks to Alvaro Lopez Ortega for updates to benchmarks to be compatible with Python 3. - Travis and AppVeyor now fail if the test module fails or errors. - Thanks to Mahdi Ben Jelloul for a patch that removed a bug where constants in `where` calls would raise a ValueError. - Fixed a bug whereby all-constant power operations would lead to infinite recursion. Changes from 2.6.3 to 2.6.4 --------------------------- - Christoph Gohlke noticed a lack of coverage for the 2.6.3 `floor` and `ceil` functions for MKL that caused seg-faults in test, so thanks to him for that. Changes from 2.6.2 to 2.6.3 --------------------------- - Documentation now available at readthedocs.io_. - Support for floor() and ceil() functions added by Caleb P. Burns. - NumPy requirement increased from 1.6 to 1.7 due to changes in iterator flags (#245). - Sphinx autodocs support added for documentation on readthedocs.org. - Fixed a bug where complex constants would return an error, fixing problems with `sympy` when using NumExpr as a backend. - Fix for #277 whereby arrays of shape (1,...) would be reduced as if they were full reduction. Behavoir now matches that of NumPy. - String literals are automatically encoded into 'ascii' bytes for convience (see #281). .. _readthedocs.io: http://numexpr.readthedocs.io Changes from 2.6.1 to 2.6.2 --------------------------- - Updates to keep with API changes in newer NumPy versions (#228). Thanks to Oleksandr Pavlyk. - Removed several warnings (#226 and #227). Thanks to Oleksander Pavlyk. - Fix bugs in function `stringcontains()` (#230). Thanks to Alexander Shadchin. - Detection of the POWER processor (#232). Thanks to Breno Leitao. - Fix pow result casting (#235). Thanks to Fernando Seiti Furusato. - Fix integers to negative integer powers (#240). Thanks to Antonio Valentino. - Detect numpy exceptions in expression evaluation (#240). Thanks to Antonio Valentino. - Better handling of RC versions (#243). Thanks to Antonio Valentino. Changes from 2.6.0 to 2.6.1 --------------------------- - Fixed a performance regression in some situations as consequence of increasing too much the BLOCK_SIZE1 constant. After more careful benchmarks (both in VML and non-VML modes), the value has been set again to 1024 (down from 8192). The benchmarks have been made with a relatively new processor (Intel Xeon E3-1245 v5 @ 3.50GHz), so they should work well for a good range of processors again. - Added NetBSD support to CPU detection. Thanks to Thomas Klausner. Changes from 2.5.2 to 2.6.0 --------------------------- - Introduced a new re_evaluate() function for re-evaluating the previous executed array expression without any check. This is meant for accelerating loops that are re-evaluating the same expression repeatedly without changing anything else than the operands. If unsure, use evaluate() which is safer. - The BLOCK_SIZE1 and BLOCK_SIZE2 constants have been re-checked in order to find a value maximizing most of the benchmarks in bench/ directory. The new values (8192 and 16 respectively) give somewhat better results (~5%) overall. The CPU used for fine tuning is a relatively new Haswell processor (E3-1240 v3). - The '--name' flag for `setup.py` returning the name of the package is honored now (issue #215). Changes from 2.5.1 to 2.5.2 --------------------------- - conj() and abs() actually added as VML-powered functions, preventing the same problems than log10() before (PR #212). Thanks to Tom Kooij for the fix! Changes from 2.5 to 2.5.1 ------------------------- - Fix for log10() and conj() functions. These produced wrong results when numexpr was compiled with Intel's MKL (which is a popular build since Anaconda ships it by default) and non-contiguous data (issue #210). Thanks to Arne de Laat and Tom Kooij for reporting and providing a nice test unit. - Fix that allows numexpr-powered apps to be profiled with pympler. Thanks to @nbecker. Changes from 2.4.6 to 2.5 ------------------------- - Added locking for allowing the use of numexpr in multi-threaded callers (this does not prevent numexpr to use multiple cores simultaneously). (PR #199, Antoine Pitrou, PR #200, Jenn Olsen). - Added new min() and max() functions (PR #195, CJ Carey). Changes from 2.4.5 to 2.4.6 --------------------------- - Fixed some UserWarnings in Solaris (PR #189, Graham Jones). - Better handling of MSVC defines. (#168, Francesc Alted). Changes from 2.4.4 to 2.4.5 --------------------------- - Undone a 'fix' for a harmless data race. (#185 Benedikt Reinartz, Francesc Alted). - Ignore NumPy warnings (overflow/underflow, divide by zero and others) that only show up in Python3. Masking these warnings in tests is fine because all the results are checked to be valid. (#183, Francesc Alted). Changes from 2.4.3 to 2.4.4 --------------------------- - Fix bad #ifdef for including stdint on Windows (PR #186, Mike Sarahan). Changes from 2.4.3 to 2.4.4 --------------------------- * Honor OMP_NUM_THREADS as a fallback in case NUMEXPR_NUM_THREADS is not set. Fixes #161. (PR #175, Stefan Erb). * Added support for AppVeyor (PR #178 Andrea Bedini) * Fix to allow numexpr to be imported after eventlet.monkey_patch(), as suggested in #118 (PR #180 Ben Moran). * Fix harmless data race that triggers false positives in ThreadSanitizer. (PR #179, Clement Courbet). * Fixed some string tests on Python 3 (PR #182, Antonio Valentino). Changes from 2.4.2 to 2.4.3 --------------------------- * Comparisons with empty strings work correctly now. Fixes #121 and PyTables #184. Changes from 2.4.1 to 2.4.2 --------------------------- * Improved setup.py so that pip can query the name and version without actually doing the installation. Thanks to Joris Borgdorff. Changes from 2.4 to 2.4.1 ------------------------- * Added more configuration examples for compiling with MKL/VML support. Thanks to Davide Del Vento. * Symbol MKL_VML changed into MKL_DOMAIN_VML because the former is deprecated in newer MKL. Thanks to Nick Papior Andersen. * Better determination of methods in `cpuinfo` module. Thanks to Marc Jofre. * Improved NumPy version determination (handy for 1.10.0). Thanks to Åsmund Hjulstad. * Benchmarks run now with both Python 2 and Python 3. Thanks to Zoran Plesivčak. Changes from 2.3.1 to 2.4 ------------------------- * A new `contains()` function has been added for detecting substrings in strings. Only plain strings (bytes) are supported for now. See PR #135 and ticket #142. Thanks to Marcin Krol. * New version of setup.py that allows better management of NumPy dependency. See PR #133. Thanks to Aleks Bunin. Changes from 2.3 to 2.3.1 ------------------------- * Added support for shift-left (<<) and shift-right (>>) binary operators. See PR #131. Thanks to fish2000! * Removed the rpath flag for the GCC linker, because it is probably not necessary and it chokes to clang. Changes from 2.2.2 to 2.3 ------------------------- * Site has been migrated to https://github.com/pydata/numexpr. All new tickets and PR should be directed there. * [ENH] A `conj()` function for computing the conjugate of complex arrays has been added. Thanks to David Menéndez. See PR #125. * [FIX] Fixed a DeprecationWarning derived of using oa_ndim -- 0 and op_axes -- NULL when using NpyIter_AdvancedNew() and NumPy 1.8. Thanks to Mark Wiebe for advise on how to fix this properly. Changes from 2.2.1 to 2.2.2 --------------------------- * The `copy_args` argument of `NumExpr` function has been brought lack. This has been mainly necessary for compatibility with `PyTables < 3.0`, which I decided to continue to support. Fixed #115. * The `__nonzero__` method in `ExpressionNode` class has been commented out. This is also for compatibility with `PyTables < 3.0`. See #24 for details. * Fixed the type of some parameters in the C extension so that s390 architecture compiles. Fixes #116. Thank to Antonio Valentino for reporting and the patch. Changes from 2.2 to 2.2.1 ------------------------- * Fixes a secondary effect of "from numpy.testing import `*`", where division is imported now too, so only then necessary functions from there are imported now. Thanks to Christoph Gohlke for the patch. Changes from 2.1 to 2.2 ----------------------- * [LICENSE] Fixed a problem with the license of the numexpr/win32/pthread.{c,h} files emulating pthreads on Windows platforms. After persmission from the original authors is granted, these files adopt the MIT license and can be redistributed without problems. See issue #109 for details (https://code.google.com/p/numexpr/issues/detail?id-110). * [ENH] Improved the algorithm to decide the initial number of threads to be used. This was necessary because by default, numexpr was using a number of threads equal to the detected number of cores, and this can be just too much for moder systems where this number can be too high (and counterporductive for performance in many cases). Now, the 'NUMEXPR_NUM_THREADS' environment variable is honored, and in case this is not present, a maximum number of *8* threads are setup initially. The new algorithm is fully described in the Users Guide now in the note of 'General routines' section: https://code.google.com/p/numexpr/wiki/UsersGuide#General_routines. Closes #110. * [ENH] numexpr.test() returns `TestResult` instead of None now. Closes #111. * [FIX] Modulus with zero with integers no longer crashes the interpreter. It nows puts a zero in the result. Fixes #107. * [API CLEAN] Removed `copy_args` argument of `evaluate`. This should only be used by old versions of PyTables (< 3.0). * [DOC] Documented the `optimization` and `truediv` flags of `evaluate` in Users Guide (https://code.google.com/p/numexpr/wiki/UsersGuide). Changes from 2.0.1 to 2.1 --------------------------- * Dropped compatibility with Python < 2.6. * Improve compatibiity with Python 3: - switch from PyString to PyBytes API (requires Python >- 2.6). - fixed incompatibilities regarding the int/long API - use the Py_TYPE macro - use the PyVarObject_HEAD_INIT macro instead of PyObject_HEAD_INIT * Fixed several issues with different platforms not supporting multithreading or subprocess properly (see tickets #75 and #77). * Now, when trying to use pure Python boolean operators, 'and', 'or' and 'not', an error is issued suggesting that '&', '|' and '~' should be used instead (fixes #24). Changes from 2.0 to 2.0.1 ------------------------- * Added compatibility with Python 2.5 (2.4 is definitely not supported anymore). * `numexpr.evaluate` is fully documented now, in particular the new `out`, `order` and `casting` parameters. * Reduction operations are fully documented now. * Negative axis in reductions are not supported (they have never been actually), and a `ValueError` will be raised if they are used. Changes from 1.x series to 2.0 ------------------------------ - Added support for the new iterator object in NumPy 1.6 and later. This allows for better performance with operations that implies broadcast operations, fortran-ordered or non-native byte orderings. Performance for other scenarios is preserved (except for very small arrays). - Division in numexpr is consistent now with Python/NumPy. Fixes #22 and #58. - Constants like "2." or "2.0" must be evaluated as float, not integer. Fixes #59. - `evaluate()` function has received a new parameter `out` for storing the result in already allocated arrays. This is very useful when dealing with large arrays, and a allocating new space for keeping the result is not acceptable. Closes #56. - Maximum number of threads raised from 256 to 4096. Machines with a higher number of cores will still be able to import numexpr, but limited to 4096 (which is an absurdly high number already). Changes from 1.4.1 to 1.4.2 --------------------------- - Multithreaded operation is disabled for small arrays (< 32 KB). This allows to remove the overhead of multithreading for such a small arrays. Closes #36. - Dividing int arrays by zero gives a 0 as result now (and not a floating point exception anymore. This behaviour mimics NumPy. Thanks to Gaëtan de Menten for the fix. Closes #37. - When compiled with VML support, the number of threads is set to 1 for VML core, and to the number of cores for the native pthreads implementation. This leads to much better performance. Closes #39. - Fixed different issues with reduction operations (`sum`, `prod`). The problem is that the threaded code does not work well for broadcasting or reduction operations. Now, the serial code is used in those cases. Closes #41. - Optimization of "compilation phase" through a better hash. This can lead up to a 25% of improvement when operating with variable expressions over small arrays. Thanks to Gaëtan de Menten for the patch. Closes #43. - The ``set_num_threads`` now returns the number of previous thread setting, as stated in the docstrings. Changes from 1.4 to 1.4.1 ------------------------- - Mingw32 can also work with pthreads compatibility code for win32. Fixes #31. - Fixed a problem that used to happen when running Numexpr with threads in subprocesses. It seems that threads needs to be initialized whenever a subprocess is created. Fixes #33. - The GIL (Global Interpreter Lock) is released during computations. This should allow for better resource usage for multithreaded apps. Fixes #35. Changes from 1.3.1 to 1.4 ------------------------- - Added support for multi-threading in pure C. This is to avoid the GIL and allows to squeeze the best performance in both multi-core machines. - David Cooke contributed a thorough refactorization of the opcode machinery for the virtual machine. With this, it is really easy to add more opcodes. See: http://code.google.com/p/numexpr/issues/detail?id-28 as an example. - Added a couple of opcodes to VM: where_bbbb and cast_ib. The first allow to get boolean arrays out of the `where` function. The second allows to cast a boolean array into an integer one. Thanks to gdementen for his contribution. - Fix negation of `int64` numbers. Closes #25. - Using a `npy_intp` datatype (instead of plain `int`) so as to be able to manage arrays larger than 2 GB. Changes from 1.3 to 1.3.1 ------------------------- - Due to an oversight, ``uint32`` types were not properly supported. That has been solved. Fixes #19. - Function `abs` for computing the absolute value added. However, it does not strictly follow NumPy conventions. See ``README.txt`` or website docs for more info on this. Thanks to Pauli Virtanen for the patch. Fixes #20. Changes from 1.2 to 1.3 ----------------------- - A new type called internally `float` has been implemented so as to be able to work natively with single-precision floating points. This prevents the silent upcast to `double` types that was taking place in previous versions, so allowing both an improved performance and an optimal usage of memory for the single-precision computations. However, the casting rules for floating point types slightly differs from those of NumPy. See: http://code.google.com/p/numexpr/wiki/Overview or the README.txt file for more info on this issue. - Support for Python 2.6 added. - When linking with the MKL, added a '-rpath' option to the link step so that the paths to MKL libraries are automatically included into the runtime library search path of the final package (i.e. the user won't need to update its LD_LIBRARY_PATH or LD_RUN_PATH environment variables anymore). Fixes #16. Changes from 1.1.1 to 1.2 ------------------------- - Support for Intel's VML (Vector Math Library) added, normally included in Intel's MKL (Math Kernel Library). In addition, when the VML support is on, several processors can be used in parallel (see the new `set_vml_num_threads()` function). With that, the computations of transcendental functions can be accelerated quite a few. For example, typical speed-ups when using one single core for contiguous arrays are 3x with peaks of 7.5x (for the pow() function). When using 2 cores the speed-ups are around 4x and 14x respectively. Closes #9. - Some new VML-related functions have been added: * set_vml_accuracy_mode(mode): Set the accuracy for VML operations. * set_vml_num_threads(nthreads): Suggests a maximum number of threads to be used in VML operations. * get_vml_version(): Get the VML/MKL library version. See the README.txt for more info about them. - In order to easily allow the detection of the MKL, the setup.py has been updated to use the numpy.distutils. So, if you are already used to link NumPy/SciPy with MKL, then you will find that giving VML support to numexpr works almost the same. - A new `print_versions()` function has been made available. This allows to quickly print the versions on which numexpr is based on. Very handy for issue reporting purposes. - The `numexpr.numexpr` compiler function has been renamed to `numexpr.NumExpr` in order to avoid name collisions with the name of the package (!). This function is mainly for internal use, so you should not need to upgrade your existing numexpr scripts. Changes from 1.1 to 1.1.1 ------------------------- - The case for multidimensional array operands is properly accelerated now. Added a new benchmark (based on a script provided by Andrew Collette, thanks!) for easily testing this case in the future. Closes #12. - Added a fix to avoid the caches in numexpr to grow too much. The dictionary caches are kept now always with less than 256 entries. Closes #11. - The VERSION file is correctly copied now (it was not present for the 1.1 tar file, I don't know exactly why). Closes #8. Changes from 1.0 to 1.1 ----------------------- - Numexpr can work now in threaded environments. Fixes #2. - The test suite can be run programmatically by using ``numexpr.test()``. - Support a more complete set of functions for expressions (including those that are not supported by MSVC 7.1 compiler, like the inverse hyperbolic or log1p and expm1 functions. The complete list now is: * where(bool, number1, number2): number Number1 if the bool condition is true, number2 otherwise. * {sin,cos,tan}(float|complex): float|complex Trigonometric sinus, cosinus or tangent. * {arcsin,arccos,arctan}(float|complex): float|complex Trigonometric inverse sinus, cosinus or tangent. * arctan2(float1, float2): float Trigonometric inverse tangent of float1/float2. * {sinh,cosh,tanh}(float|complex): float|complex Hyperbolic sinus, cosinus or tangent. * {arcsinh,arccosh,arctanh}(float|complex): float|complex Hyperbolic inverse sinus, cosinus or tangent. * {log,log10,log1p}(float|complex): float|complex Natural, base-10 and log(1+x) logarithms. * {exp,expm1}(float|complex): float|complex Exponential and exponential minus one. * sqrt(float|complex): float|complex Square root. * {real,imag}(complex): float Real or imaginary part of complex. * complex(float, float): complex Complex from real and imaginary parts. .. Local Variables: .. mode: rst .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.7.1/RELEASING.txt000066400000000000000000000052141360375525100153020ustar00rootroot00000000000000================== Releasing Numexpr ================== :Author: Francesc Alted :Contact: faltet@gmail.com :Date: 2009-06-02 Following are notes useful for releasing Numexpr. Preliminaries ------------- - Make sure that ``RELEASE_NOTES.txt`` and ``ANNOUNCE.txt`` are up to date with the latest news in the release. - Remove the `.devN` suffix in ``numexpr/version.py``. - Do a commit and a push: $ git commit -a -m"Getting ready for release X.Y.Z" Testing ------- - Run the test suite in different platforms (at least Linux and Windows) and make sure that all tests passes. - Re-compile with MKL support and see if all tests passes as well. - Run all the benchmarks in ``bench/`` directory and see if the speed-ups are the expected ones. Packaging --------- - Make the tarball with the command: $ python setup.py sdist Do a quick check that the tarball is sane. Releasing --------- - Create a tag ``vX.Y.Z`` from ``master``. Use the next message: $ git tag -a vX.Y.Z -m "Tagging version X.Y.Z" - Push the tag to the github repo: $ git push $ git push --tags - If you happen to have to delete the tag, for example if the `manywheels` builds demonstrates a fault, first delete it locally, git tag --delete vX.Y.Z and then remotely on Github, git push --delete origin vX.Y.Z Build wheels ------------ Matthew Brett has a repository for building for hosting wheels at http://github.com/MacPython/numexpr-wheels). For the procedure to trigger and upload the built wheels, see the README at that repo. Build and upload the wheels before uploading the source distribution, to make sure that people who do not have compilers do not get breakage while the release is being uploaded. Any problems, feel free to ask @matthew-brett for help - or indeed, pass the whole task to him. Uploading the source distribution --------------------------------- - Upload it in the PyPi repository: $ python setup.py sdist upload Announcing ---------- - Send an announcement to the NumPy list, PyData and python-announce list. Use the ``ANNOUNCE.rst`` file as skeleton (or possibly as the definitive version). Post-release actions -------------------- - Edit ``numexpr/version.py`` to bump the version revision (i.e. X.Y.Z --> X.Y.(Z+1).dev0). - Create new headers for adding new features in ``RELEASE_NOTES.txt`` and add this place-holder: #XXX version-specific blurb XXX# Don't forget to update header to the next version in those files. - Commit your changes: $ git commit -a -m"Post X.Y.Z release actions done" $ git push That's all folks! .. Local Variables: .. mode: rst .. coding: utf-8 .. fill-column: 70 .. End: numexpr-2.7.1/appveyor.yml000066400000000000000000000043531360375525100156230ustar00rootroot00000000000000environment: global: # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" matrix: - PYTHON: "C:\\Python27" PYTHON_VERSION: "2.7.13" PYTHON_ARCH: "32" - PYTHON: "C:\\Python27-x64" PYTHON_VERSION: "2.7.13" PYTHON_ARCH: "64" - PYTHON: "C:\\Python34" PYTHON_VERSION: "3.4.6" PYTHON_ARCH: "32" - PYTHON: "C:\\Python34-x64" PYTHON_VERSION: "3.4.6" PYTHON_ARCH: "64" - PYTHON: "C:\\Python38" PYTHON_VERSION: "3.8.0" PYTHON_ARCH: "32" - PYTHON: "C:\\Python38-x64" PYTHON_VERSION: "3.8.0" PYTHON_ARCH: "64" install: - ECHO "Filesystem root:" - ps: "ls \"C:/\"" - ECHO "Installed SDKs:" - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\"" # Install Python (from the official .msi of http://python.org) and pip when # not already installed. - "powershell ./appveyor/install.ps1" # Prepend newly installed Python to the PATH of this build (this cannot be # done from inside the powershell script as it would require to restart # the parent CMD process). - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" # Check that we have the expected version and architecture for Python - "python --version" - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" # Install the build and runtime dependencies of the project. - "%CMD_IN_ENV% pip install --timeout=60 -r appveyor/requirements.txt" - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst" # Install the genreated wheel package to test it - "pip install --pre --no-index --find-links dist/ numexpr" build: false test_script: # Change to a non-source folder to make sure we run the tests on the # installed library. - "mkdir empty_folder" - "cd empty_folder" - "python -c \"import sys;import numexpr;sys.exit(0 if numexpr.test().wasSuccessful() else 1)\"" - "cd .." artifacts: # Archive the generated wheel package in the ci.appveyor.com build report. - path: dist\* #on_success: # - TODO: upload the content of dist/*.whl to a public wheelhouse # numexpr-2.7.1/appveyor/000077500000000000000000000000001360375525100150735ustar00rootroot00000000000000numexpr-2.7.1/appveyor/install.ps1000066400000000000000000000135431360375525100171740ustar00rootroot00000000000000# Sample script to install Python and pip under Windows # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ $MINICONDA_URL = "http://repo.continuum.io/miniconda/" $BASE_URL = "https://www.python.org/ftp/python/" $GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py" $GET_PIP_PATH = "C:\get-pip.py" function DownloadPython ($python_version, $platform_suffix) { $webclient = New-Object System.Net.WebClient $filename = "python-" + $python_version + $platform_suffix + ".msi" $url = $BASE_URL + $python_version + "/" + $filename $basedir = $pwd.Path + "\" $filepath = $basedir + $filename if (Test-Path $filename) { Write-Host "Reusing" $filepath return $filepath } # Download and retry up to 3 times in case of network transient errors. Write-Host "Downloading" $filename "from" $url $retry_attempts = 2 for($i=0; $i -lt $retry_attempts; $i++){ try { $webclient.DownloadFile($url, $filepath) break } Catch [Exception]{ Start-Sleep 1 } } if (Test-Path $filepath) { Write-Host "File saved at" $filepath } else { # Retry once to get the error message if any at the last try $webclient.DownloadFile($url, $filepath) } return $filepath } function InstallPython ($python_version, $architecture, $python_home) { Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home if (Test-Path $python_home) { Write-Host $python_home "already exists, skipping." return $false } if ($architecture -eq "32") { $platform_suffix = "" } else { $platform_suffix = ".amd64" } $msipath = DownloadPython $python_version $platform_suffix Write-Host "Installing" $msipath "to" $python_home $install_log = $python_home + ".log" $install_args = "/qn /log $install_log /i $msipath TARGETDIR=$python_home" $uninstall_args = "/qn /x $msipath" RunCommand "msiexec.exe" $install_args if (-not(Test-Path $python_home)) { Write-Host "Python seems to be installed else-where, reinstalling." RunCommand "msiexec.exe" $uninstall_args RunCommand "msiexec.exe" $install_args } if (Test-Path $python_home) { Write-Host "Python $python_version ($architecture) installation complete" } else { Write-Host "Failed to install Python in $python_home" Get-Content -Path $install_log Exit 1 } } function RunCommand ($command, $command_args) { Write-Host $command $command_args Start-Process -FilePath $command -ArgumentList $command_args -Wait -Passthru } function InstallPip ($python_home) { $pip_path = $python_home + "\Scripts\pip.exe" $python_path = $python_home + "\python.exe" if (-not(Test-Path $pip_path)) { Write-Host "Installing pip..." $webclient = New-Object System.Net.WebClient $webclient.DownloadFile($GET_PIP_URL, $GET_PIP_PATH) Write-Host "Executing:" $python_path $GET_PIP_PATH Start-Process -FilePath "$python_path" -ArgumentList "$GET_PIP_PATH" -Wait -Passthru } else { Write-Host "pip already installed." } } function DownloadMiniconda ($python_version, $platform_suffix) { $webclient = New-Object System.Net.WebClient if ($python_version -eq "3.4") { $filename = "Miniconda3-3.5.5-Windows-" + $platform_suffix + ".exe" } else { $filename = "Miniconda-3.5.5-Windows-" + $platform_suffix + ".exe" } $url = $MINICONDA_URL + $filename $basedir = $pwd.Path + "\" $filepath = $basedir + $filename if (Test-Path $filename) { Write-Host "Reusing" $filepath return $filepath } # Download and retry up to 3 times in case of network transient errors. Write-Host "Downloading" $filename "from" $url $retry_attempts = 2 for($i=0; $i -lt $retry_attempts; $i++){ try { $webclient.DownloadFile($url, $filepath) break } Catch [Exception]{ Start-Sleep 1 } } if (Test-Path $filepath) { Write-Host "File saved at" $filepath } else { # Retry once to get the error message if any at the last try $webclient.DownloadFile($url, $filepath) } return $filepath } function InstallMiniconda ($python_version, $architecture, $python_home) { Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home if (Test-Path $python_home) { Write-Host $python_home "already exists, skipping." return $false } if ($architecture -eq "32") { $platform_suffix = "x86" } else { $platform_suffix = "x86_64" } $filepath = DownloadMiniconda $python_version $platform_suffix Write-Host "Installing" $filepath "to" $python_home $install_log = $python_home + ".log" $args = "/S /D=$python_home" Write-Host $filepath $args Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru if (Test-Path $python_home) { Write-Host "Python $python_version ($architecture) installation complete" } else { Write-Host "Failed to install Python in $python_home" Get-Content -Path $install_log Exit 1 } } function InstallMinicondaPip ($python_home) { $pip_path = $python_home + "\Scripts\pip.exe" $conda_path = $python_home + "\Scripts\conda.exe" if (-not(Test-Path $pip_path)) { Write-Host "Installing pip..." $args = "install --yes pip" Write-Host $conda_path $args Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru } else { Write-Host "pip already installed." } } function main () { InstallPython $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON InstallPip $env:PYTHON } main numexpr-2.7.1/appveyor/requirements.txt000066400000000000000000000001021360375525100203500ustar00rootroot00000000000000# RAM: updated to use official NumPy wheels from PyPI numpy wheel numexpr-2.7.1/appveyor/run_with_env.cmd000066400000000000000000000034621360375525100202740ustar00rootroot00000000000000:: To build extensions for 64 bit Python 3, we need to configure environment :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) :: :: To build extensions for 64 bit Python 2, we need to configure environment :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) :: :: 32 bit builds do not require specific environment configurations. :: :: Note: this script needs to be run with the /E:ON and /V:ON flags for the :: cmd interpreter, at least for (SDK v7.0) :: :: More details at: :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows :: http://stackoverflow.com/a/13751649/163740 :: :: Author: Olivier Grisel :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ @ECHO OFF SET COMMAND_TO_RUN=%* SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows SET MAJOR_PYTHON_VERSION="%PYTHON_VERSION:~0,1%" IF %MAJOR_PYTHON_VERSION% == "2" ( SET WINDOWS_SDK_VERSION="v7.0" ) ELSE IF %MAJOR_PYTHON_VERSION% == "3" ( SET WINDOWS_SDK_VERSION="v7.1" ) ELSE ( ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" EXIT 1 ) IF "%PYTHON_ARCH%"=="64" ( ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture SET DISTUTILS_USE_SDK=1 SET MSSdk=1 "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) ELSE ( ECHO Using default MSVC build environment for 32 bit architecture ECHO Executing: %COMMAND_TO_RUN% call %COMMAND_TO_RUN% || EXIT 1 ) numexpr-2.7.1/bench/000077500000000000000000000000001360375525100143055ustar00rootroot00000000000000numexpr-2.7.1/bench/boolean_timing.py000066400000000000000000000126031360375525100176470ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from __future__ import print_function import sys import timeit import numpy array_size = 1000*1000 iterations = 10 numpy_ttime = [] numpy_sttime = [] numpy_nttime = [] numexpr_ttime = [] numexpr_sttime = [] numexpr_nttime = [] def compare_times(expr, nexpr): global numpy_ttime global numpy_sttime global numpy_nttime global numexpr_ttime global numexpr_sttime global numexpr_nttime print("******************* Expression:", expr) setup_contiguous = setupNP_contiguous setup_strided = setupNP_strided setup_unaligned = setupNP_unaligned numpy_timer = timeit.Timer(expr, setup_contiguous) numpy_time = round(numpy_timer.timeit(number=iterations), 4) numpy_ttime.append(numpy_time) print('numpy:', numpy_time / iterations) numpy_timer = timeit.Timer(expr, setup_strided) numpy_stime = round(numpy_timer.timeit(number=iterations), 4) numpy_sttime.append(numpy_stime) print('numpy strided:', numpy_stime / iterations) numpy_timer = timeit.Timer(expr, setup_unaligned) numpy_ntime = round(numpy_timer.timeit(number=iterations), 4) numpy_nttime.append(numpy_ntime) print('numpy unaligned:', numpy_ntime / iterations) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_contiguous) numexpr_time = round(numexpr_timer.timeit(number=iterations), 4) numexpr_ttime.append(numexpr_time) print("numexpr:", numexpr_time/iterations, end=" ") print("Speed-up of numexpr over numpy:", round(numpy_time/numexpr_time, 4)) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_strided) numexpr_stime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_sttime.append(numexpr_stime) print("numexpr strided:", numexpr_stime/iterations, end=" ") print("Speed-up of numexpr strided over numpy:", \ round(numpy_stime/numexpr_stime, 4)) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_unaligned) numexpr_ntime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_nttime.append(numexpr_ntime) print("numexpr unaligned:", numexpr_ntime/iterations, end=" ") print("Speed-up of numexpr unaligned over numpy:", \ round(numpy_ntime/numexpr_ntime, 4)) setupNP = """\ from numpy import arange, where, arctan2, sqrt from numpy import rec as records from numexpr import evaluate # Initialize a recarray of 16 MB in size r=records.array(None, formats='a%s,i4,f8', shape=%s) c1 = r.field('f0')%s i2 = r.field('f1')%s f3 = r.field('f2')%s c1[:] = "a" i2[:] = arange(%s)/1000 f3[:] = i2/2. """ setupNP_contiguous = setupNP % (4, array_size, ".copy()", ".copy()", ".copy()", array_size) setupNP_strided = setupNP % (4, array_size, "", "", "", array_size) setupNP_unaligned = setupNP % (1, array_size, "", "", "", array_size) expressions = [] expressions.append('i2 > 0') expressions.append('i2 < 0') expressions.append('i2 < f3') expressions.append('i2-10 < f3') expressions.append('i2*f3+f3*f3 > i2') expressions.append('0.1*i2 > arctan2(i2, f3)') expressions.append('i2%2 > 3') expressions.append('i2%10 < 4') expressions.append('i2**2 + (f3+1)**-2.5 < 3') expressions.append('(f3+1)**50 > i2') expressions.append('sqrt(i2**2 + f3**2) > 1') expressions.append('(i2>2) | ((f3**2>3) & ~(i2*f3<2))') def compare(expression=False): if expression: compare_times(expression, 1) sys.exit(0) nexpr = 0 for expr in expressions: nexpr += 1 compare_times(expr, nexpr) print() if __name__ == '__main__': import numexpr numexpr.print_versions() if len(sys.argv) > 1: expression = sys.argv[1] print("expression-->", expression) compare(expression) else: compare() tratios = numpy.array(numpy_ttime) / numpy.array(numexpr_ttime) stratios = numpy.array(numpy_sttime) / numpy.array(numexpr_sttime) ntratios = numpy.array(numpy_nttime) / numpy.array(numexpr_nttime) print("*************** Numexpr vs NumPy speed-ups *******************") # print "numpy total:", sum(numpy_ttime)/iterations # print "numpy strided total:", sum(numpy_sttime)/iterations # print "numpy unaligned total:", sum(numpy_nttime)/iterations # print "numexpr total:", sum(numexpr_ttime)/iterations print("Contiguous case:\t %s (mean), %s (min), %s (max)" % \ (round(tratios.mean(), 2), round(tratios.min(), 2), round(tratios.max(), 2))) # print "numexpr strided total:", sum(numexpr_sttime)/iterations print("Strided case:\t\t %s (mean), %s (min), %s (max)" % \ (round(stratios.mean(), 2), round(stratios.min(), 2), round(stratios.max(), 2))) # print "numexpr unaligned total:", sum(numexpr_nttime)/iterations print("Unaligned case:\t\t %s (mean), %s (min), %s (max)" % \ (round(ntratios.mean(), 2), round(ntratios.min(), 2), round(ntratios.max(), 2))) numexpr-2.7.1/bench/issue-36.py000066400000000000000000000015771360375525100162470ustar00rootroot00000000000000# Small benchmark to get the even point where the threading code # performs better than the serial code. See issue #36 for details. from __future__ import print_function import numpy as np import numexpr as ne from numpy.testing import assert_array_equal from time import time def bench(N): print("*** array length:", N) a = np.arange(N) t0 = time() ntimes = (1000*2**15) // N for i in range(ntimes): ne.evaluate('a>1000') print("numexpr--> %.3g" % ((time()-t0)/ntimes,)) t0 = time() for i in range(ntimes): eval('a>1000') print("numpy--> %.3g" % ((time()-t0)/ntimes,)) if __name__ == "__main__": print("****** Testing with 1 thread...") ne.set_num_threads(1) for N in range(10, 20): bench(2**N) print("****** Testing with 2 threads...") ne.set_num_threads(2) for N in range(10, 20): bench(2**N) numexpr-2.7.1/bench/issue-47.py000066400000000000000000000003031360375525100162330ustar00rootroot00000000000000import numpy import numexpr numexpr.set_num_threads(8) x0,x1,x2,x3,x4,x5 = [0,1,2,3,4,5] t = numpy.linspace(0,1,44100000).reshape(-1,1) numexpr.evaluate('(x0+x1*t+x2*t**2)* cos(x3+x4*t+x5**t)') numexpr-2.7.1/bench/multidim.py000066400000000000000000000052231360375525100165050ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### # Script to check that multidimensional arrays are speed-up properly too # Based on a script provided by Andrew Collette. from __future__ import print_function import numpy as np import numexpr as nx import time test_shapes = [ (100*100*100), (100*100,100), (100,100,100), ] test_dtype = 'f4' nruns = 10 # Ensemble for timing def chunkify(chunksize): """ Very stupid "chunk vectorizer" which keeps memory use down. This version requires all inputs to have the same number of elements, although it shouldn't be that hard to implement simple broadcasting. """ def chunkifier(func): def wrap(*args): assert len(args) > 0 assert all(len(a.flat) == len(args[0].flat) for a in args) nelements = len(args[0].flat) nchunks, remain = divmod(nelements, chunksize) out = np.ndarray(args[0].shape) for start in range(0, nelements, chunksize): #print(start) stop = start+chunksize if start+chunksize > nelements: stop = nelements-start iargs = tuple(a.flat[start:stop] for a in args) out.flat[start:stop] = func(*iargs) return out return wrap return chunkifier test_func_str = "63 + (a*b) + (c**2) + b" def test_func(a, b, c): return 63 + (a*b) + (c**2) + b test_func_chunked = chunkify(100*100)(test_func) for test_shape in test_shapes: test_size = np.product(test_shape) # The actual data we'll use a = np.arange(test_size, dtype=test_dtype).reshape(test_shape) b = np.arange(test_size, dtype=test_dtype).reshape(test_shape) c = np.arange(test_size, dtype=test_dtype).reshape(test_shape) start1 = time.time() for idx in range(nruns): result1 = test_func(a, b, c) stop1 = time.time() start2 = time.time() for idx in range(nruns): result2 = nx.evaluate(test_func_str) stop2 = time.time() start3 = time.time() for idx in range(nruns): result3 = test_func_chunked(a, b, c) stop3 = time.time() print("%s %s (average of %s runs)" % (test_shape, test_dtype, nruns)) print("Simple: ", (stop1-start1)/nruns) print("Numexpr: ", (stop2-start2)/nruns) print("Chunked: ", (stop3-start3)/nruns) numexpr-2.7.1/bench/poly.c000066400000000000000000000047621360375525100154450ustar00rootroot00000000000000/* ####################################################################### */ /* This script compares the speed of the computation of a polynomial */ /* in C in a couple of different ways. */ /* */ /* Author: Francesc Alted */ /* Date: 2010-02-05 */ /* ####################################################################### */ #include #include #if defined(_WIN32) && !defined(__MINGW32__) #include #include #else #include #include #endif #define N 10*1000*1000 double x[N]; double y[N]; #if defined(_WIN32) && !defined(__MINGW32__) #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 #else #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL #endif struct timezone { int tz_minuteswest; /* minutes W of Greenwich */ int tz_dsttime; /* type of dst correction */ }; int gettimeofday(struct timeval *tv, struct timezone *tz) { FILETIME ft; unsigned __int64 tmpres = 0; static int tzflag; if (NULL != tv) { GetSystemTimeAsFileTime(&ft); tmpres |= ft.dwHighDateTime; tmpres <<= 32; tmpres |= ft.dwLowDateTime; /*converting file time to unix epoch*/ tmpres -= DELTA_EPOCH_IN_MICROSECS; tmpres /= 10; /*convert into microseconds*/ tv->tv_sec = (long)(tmpres / 1000000UL); tv->tv_usec = (long)(tmpres % 1000000UL); } if (NULL != tz) { if (!tzflag) { _tzset(); tzflag++; } tz->tz_minuteswest = _timezone / 60; tz->tz_dsttime = _daylight; } return 0; } #endif /* _WIN32 */ /* Given two timeval stamps, return the difference in seconds */ float getseconds(struct timeval last, struct timeval current) { int sec, usec; sec = current.tv_sec - last.tv_sec; usec = current.tv_usec - last.tv_usec; return (float)(((double)sec + usec*1e-6)); } int main(void) { int i; double inf = -1; struct timeval last, current; float tspend; for(i=0; i 1: # first arg is the package to use what = sys.argv[1] if len(sys.argv) > 2: # second arg is the number of threads to use nthreads = int(sys.argv[2]) if "ncores" in dir(ne): ne.set_num_threads(nthreads) if what not in ("numpy", "numexpr"): print("Unrecognized module:", what) sys.exit(0) print("Computing: '%s' using %s with %d points" % (expr, what, N)) t0 = time() result = compute() ts = round(time() - t0, 3) print("*** Time elapsed:", ts) numexpr-2.7.1/bench/timing.py000066400000000000000000000076051360375525100161560ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from __future__ import print_function import timeit, numpy array_size = 1e6 iterations = 2 # Choose the type you want to benchmark #dtype = 'int8' #dtype = 'int16' #dtype = 'int32' #dtype = 'int64' dtype = 'float32' #dtype = 'float64' def compare_times(setup, expr): print("Expression:", expr) namespace = {} exec(setup, namespace) numpy_timer = timeit.Timer(expr, setup) numpy_time = numpy_timer.timeit(number=iterations) print('numpy:', numpy_time / iterations) try: weave_timer = timeit.Timer('blitz("result=%s")' % expr, setup) weave_time = weave_timer.timeit(number=iterations) print("Weave:", weave_time/iterations) print("Speed-up of weave over numpy:", round(numpy_time/weave_time, 2)) except: print("Skipping weave timing") numexpr_timer = timeit.Timer('evaluate("%s", optimization="aggressive")' % expr, setup) numexpr_time = numexpr_timer.timeit(number=iterations) print("numexpr:", numexpr_time/iterations) tratio = numpy_time/numexpr_time print("Speed-up of numexpr over numpy:", round(tratio, 2)) return tratio setup1 = """\ from numpy import arange try: from scipy.weave import blitz except: pass from numexpr import evaluate result = arange(%f, dtype='%s') b = arange(%f, dtype='%s') c = arange(%f, dtype='%s') d = arange(%f, dtype='%s') e = arange(%f, dtype='%s') """ % ((array_size, dtype)*5) expr1 = 'b*c+d*e' setup2 = """\ from numpy import arange try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(%f, dtype='%s') b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr2 = '2*a+3*b' setup3 = """\ from numpy import arange, sin, cos, sinh try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr3 = '2*a + (cos(3)+5)*sinh(cos(b))' setup4 = """\ from numpy import arange, sin, cos, sinh, arctan2 try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr4 = '2*a + arctan2(a, b)' setup5 = """\ from numpy import arange, sin, cos, sinh, arctan2, sqrt, where try: from scipy.weave import blitz except: pass from numexpr import evaluate a = arange(2*%f, dtype='%s')[::2] b = arange(%f, dtype='%s') result = arange(%f, dtype='%s') """ % ((array_size, dtype)*3) expr5 = 'where(0.1*a > arctan2(a, b), 2*a, arctan2(a,b))' expr6 = 'where(a != 0.0, 2, b)' expr7 = 'where(a-10 != 0.0, a, 2)' expr8 = 'where(a%2 != 0.0, b+5, 2)' expr9 = 'where(a%2 != 0.0, 2, b+5)' expr10 = 'a**2 + (b+1)**-2.5' expr11 = '(a+1)**50' expr12 = 'sqrt(a**2 + b**2)' def compare(check_only=False): experiments = [(setup1, expr1), (setup2, expr2), (setup3, expr3), (setup4, expr4), (setup5, expr5), (setup5, expr6), (setup5, expr7), (setup5, expr8), (setup5, expr9), (setup5, expr10), (setup5, expr11), (setup5, expr12), ] total = 0 for params in experiments: total += compare_times(*params) print average = total / len(experiments) print("Average =", round(average, 2)) return average if __name__ == '__main__': import numexpr print("Numexpr version: ", numexpr.__version__) averages = [] for i in range(iterations): averages.append(compare()) print("Averages:", ', '.join("%.2f" % x for x in averages)) numexpr-2.7.1/bench/unaligned-simple.py000066400000000000000000000026501360375525100201170ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### """Very simple test that compares the speed of operating with aligned vs unaligned arrays. """ from __future__ import print_function from timeit import Timer import numpy as np import numexpr as ne niter = 10 #shape = (1000*10000) # unidimensional test shape = (1000, 10000) # multidimensional test print("Numexpr version: ", ne.__version__) Z_fast = np.zeros(shape, dtype=[('x',np.float64),('y',np.int64)]) Z_slow = np.zeros(shape, dtype=[('y1',np.int8),('x',np.float64),('y2',np.int8,(7,))]) x_fast = Z_fast['x'] t = Timer("x_fast * x_fast", "from __main__ import x_fast") print("NumPy aligned: \t", round(min(t.repeat(3, niter)), 3), "s") x_slow = Z_slow['x'] t = Timer("x_slow * x_slow", "from __main__ import x_slow") print("NumPy unaligned:\t", round(min(t.repeat(3, niter)), 3), "s") t = Timer("ne.evaluate('x_fast * x_fast')", "from __main__ import ne, x_fast") print("Numexpr aligned:\t", round(min(t.repeat(3, niter)), 3), "s") t = Timer("ne.evaluate('x_slow * x_slow')", "from __main__ import ne, x_slow") print("Numexpr unaligned:\t", round(min(t.repeat(3, niter)), 3), "s") numexpr-2.7.1/bench/varying-expr.py000066400000000000000000000031311360375525100173100ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### # Benchmark for checking if numexpr leaks memory when evaluating # expressions that changes continously. It also serves for computing # the latency of numexpr when working with small arrays. from __future__ import print_function import sys from time import time import numpy as np import numexpr as ne N = 100 M = 10 def timed_eval(eval_func, expr_func): t1 = time() for i in range(N): r = eval_func(expr_func(i)) if i % 10 == 0: sys.stdout.write('.') print(" done in %s seconds" % round(time() - t1, 3)) print("Number of iterations %s. Length of the array: %s " % (N, M)) a = np.arange(M) # lots of duplicates to collapse #expr = '+'.join('(a + 1) * %d' % i for i in range(50)) # no duplicate to collapse expr = '+'.join('(a + %d) * %d' % (i, i) for i in range(50)) def non_cacheable(i): return expr + '+ %d' % i def cacheable(i): return expr + '+ i' print("* Numexpr with non-cacheable expressions: ", end=" ") timed_eval(ne.evaluate, non_cacheable) print("* Numexpr with cacheable expressions: ", end=" ") timed_eval(ne.evaluate, cacheable) print("* Numpy with non-cacheable expressions: ", end=" ") timed_eval(eval, non_cacheable) print("* Numpy with cacheable expressions: ", end=" ") timed_eval(eval, cacheable) numexpr-2.7.1/bench/vml_timing.py000066400000000000000000000133551360375525100170330ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from __future__ import print_function import sys import timeit import numpy import numexpr array_size = 1000*1000 iterations = 10 numpy_ttime = [] numpy_sttime = [] numpy_nttime = [] numexpr_ttime = [] numexpr_sttime = [] numexpr_nttime = [] def compare_times(expr, nexpr): global numpy_ttime global numpy_sttime global numpy_nttime global numexpr_ttime global numexpr_sttime global numexpr_nttime print("******************* Expression:", expr) setup_contiguous = setupNP_contiguous setup_strided = setupNP_strided setup_unaligned = setupNP_unaligned numpy_timer = timeit.Timer(expr, setup_contiguous) numpy_time = round(numpy_timer.timeit(number=iterations), 4) numpy_ttime.append(numpy_time) print('%30s %.4f'%('numpy:', numpy_time / iterations)) numpy_timer = timeit.Timer(expr, setup_strided) numpy_stime = round(numpy_timer.timeit(number=iterations), 4) numpy_sttime.append(numpy_stime) print('%30s %.4f'%('numpy strided:', numpy_stime / iterations)) numpy_timer = timeit.Timer(expr, setup_unaligned) numpy_ntime = round(numpy_timer.timeit(number=iterations), 4) numpy_nttime.append(numpy_ntime) print('%30s %.4f'%('numpy unaligned:', numpy_ntime / iterations)) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_contiguous) numexpr_time = round(numexpr_timer.timeit(number=iterations), 4) numexpr_ttime.append(numexpr_time) print('%30s %.4f'%("numexpr:", numexpr_time/iterations,), end=" ") print("Speed-up of numexpr over numpy:", round(numpy_time/numexpr_time, 4)) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_strided) numexpr_stime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_sttime.append(numexpr_stime) print('%30s %.4f'%("numexpr strided:", numexpr_stime/iterations,), end=" ") print("Speed-up of numexpr over numpy:", \ round(numpy_stime/numexpr_stime, 4)) evalexpr = 'evaluate("%s", optimization="aggressive")' % expr numexpr_timer = timeit.Timer(evalexpr, setup_unaligned) numexpr_ntime = round(numexpr_timer.timeit(number=iterations), 4) numexpr_nttime.append(numexpr_ntime) print('%30s %.4f'%("numexpr unaligned:", numexpr_ntime/iterations,), end=" ") print("Speed-up of numexpr over numpy:", \ round(numpy_ntime/numexpr_ntime, 4)) print() setupNP = """\ from numpy import arange, linspace, arctan2, sqrt, sin, cos, exp, log from numpy import rec as records #from numexpr import evaluate from numexpr import %s # Initialize a recarray of 16 MB in size r=records.array(None, formats='a%s,i4,f4,f8', shape=%s) c1 = r.field('f0')%s i2 = r.field('f1')%s f3 = r.field('f2')%s f4 = r.field('f3')%s c1[:] = "a" i2[:] = arange(%s)/1000 f3[:] = linspace(0,1,len(i2)) f4[:] = f3*1.23 """ eval_method = "evaluate" setupNP_contiguous = setupNP % ((eval_method, 4, array_size,) + \ (".copy()",)*4 + \ (array_size,)) setupNP_strided = setupNP % (eval_method, 4, array_size, "", "", "", "", array_size) setupNP_unaligned = setupNP % (eval_method, 1, array_size, "", "", "", "", array_size) expressions = [] expressions.append('i2 > 0') expressions.append('f3+f4') expressions.append('f3+i2') expressions.append('exp(f3)') expressions.append('log(exp(f3)+1)/f4') expressions.append('0.1*i2 > arctan2(f3, f4)') expressions.append('sqrt(f3**2 + f4**2) > 1') expressions.append('sin(f3)>cos(f4)') expressions.append('f3**f4') def compare(expression=False): if expression: compare_times(expression, 1) sys.exit(0) nexpr = 0 for expr in expressions: nexpr += 1 compare_times(expr, nexpr) print() if __name__ == '__main__': import numexpr print("Numexpr version: ", numexpr.__version__) numpy.seterr(all='ignore') numexpr.set_vml_accuracy_mode('low') numexpr.set_vml_num_threads(2) if len(sys.argv) > 1: expression = sys.argv[1] print("expression-->", expression) compare(expression) else: compare() tratios = numpy.array(numpy_ttime) / numpy.array(numexpr_ttime) stratios = numpy.array(numpy_sttime) / numpy.array(numexpr_sttime) ntratios = numpy.array(numpy_nttime) / numpy.array(numexpr_nttime) print("eval method: %s" % eval_method) print("*************** Numexpr vs NumPy speed-ups *******************") # print("numpy total:", sum(numpy_ttime)/iterations) # print("numpy strided total:", sum(numpy_sttime)/iterations) # print("numpy unaligned total:", sum(numpy_nttime)/iterations) # print("numexpr total:", sum(numexpr_ttime)/iterations) print("Contiguous case:\t %s (mean), %s (min), %s (max)" % \ (round(tratios.mean(), 2), round(tratios.min(), 2), round(tratios.max(), 2))) # print("numexpr strided total:", sum(numexpr_sttime)/iterations) print("Strided case:\t\t %s (mean), %s (min), %s (max)" % \ (round(stratios.mean(), 2), round(stratios.min(), 2), round(stratios.max(), 2))) # print("numexpr unaligned total:", sum(numexpr_nttime)/iterations) print("Unaligned case:\t\t %s (mean), %s (min), %s (max)" % \ (round(ntratios.mean(), 2), round(ntratios.min(), 2), round(ntratios.max(), 2))) numexpr-2.7.1/bench/vml_timing2.py000066400000000000000000000023061360375525100171070ustar00rootroot00000000000000# References: # # http://software.intel.com/en-us/intel-mkl # https://github.com/pydata/numexpr/wiki/NumexprMKL from __future__ import print_function import datetime import sys import numpy as np import numexpr as ne from time import time N = int(5e7) x = np.linspace(0, 1, N) y = np.linspace(0, 1, N) z = np.empty(N, dtype=np.float64) # Our working set is 3 vectors of N doubles each working_set_GB = 3 * N * 8 / 2**30 print("NumPy version: %s" % (np.__version__,)) t0 = time() z = 2*y + 4*x t1 = time() gbs = working_set_GB / (t1-t0) print("Time for an algebraic expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) t0 = time() z = np.sin(x)**2 + np.cos(y)**2 t1 = time() gbs = working_set_GB / (t1-t0) print("Time for a transcendental expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) print("Numexpr version: %s. Using MKL: %s" % (ne.__version__, ne.use_vml)) t0 = time() ne.evaluate('2*y + 4*x', out = z) t1 = time() gbs = working_set_GB / (t1-t0) print("Time for an algebraic expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) t0 = time() ne.evaluate('sin(x)**2 + cos(y)**2', out = z) t1 = time() gbs = working_set_GB / (t1-t0) print("Time for a transcendental expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) numexpr-2.7.1/bench/vml_timing3.py000066400000000000000000000004741360375525100171140ustar00rootroot00000000000000# -*- coding: utf-8 -*- import numpy as np import numexpr as ne from timeit import default_timer as timer x = np.ones(100000) scaler = -1J start = timer() for k in range(10000): cexp = ne.evaluate('exp(scaler * x)') exec_time=(timer() - start) print print("Execution took", str(round(exec_time, 3)), "seconds") numexpr-2.7.1/doc/000077500000000000000000000000001360375525100137735ustar00rootroot00000000000000numexpr-2.7.1/doc/Makefile000066400000000000000000000167071360375525100154460ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = -n -v -v SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " epub3 to make an epub3" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" @echo " dummy to check syntax errors of document sources" .PHONY: clean clean: rm -rf $(BUILDDIR)/* .PHONY: html html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/numexpr3.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/numexpr3.qhc" .PHONY: applehelp applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." .PHONY: devhelp devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/numexpr3" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/numexpr3" @echo "# devhelp" .PHONY: epub epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 epub3: $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." .PHONY: info info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy dummy: $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." numexpr-2.7.1/doc/api.rst000066400000000000000000000013031360375525100152730ustar00rootroot00000000000000NumExpr API =========== .. automodule:: numexpr :members: evaluate, re_evaluate, disassemble, NumExpr, get_vml_version, set_vml_accuracy_mode, set_vml_num_threads, set_num_threads, detect_number_of_cores, detect_number_of_threads .. py:attribute:: ncores The number of (virtual) cores detected. .. py:attribute:: nthreads The number of threads currently in-use. .. py:attribute:: MAX_THREADS The maximum number of threads, as set by the environment variable ``NUMEXPR_MAX_THREADS`` .. py:attribute:: version The version of NumExpr. Tests submodule --------------- .. automodule:: numexpr.tests :members: test, print_versionsnumexpr-2.7.1/doc/conf.py000066400000000000000000000241751360375525100153030ustar00rootroot00000000000000#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # numexpr documentation build configuration file, created by # sphinx-quickstart on Sat Feb 4 17:19:36 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os, sys import numexpr module_location = os.path.dirname( numexpr.__file__ ) sys.path.insert(0, os.path.abspath(module_location) ) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', 'sphinx.ext.todo', 'sphinx.ext.imgmath', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', 'numpydoc', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'numexpr' copyright = '2006 (various)-2017, David M. Cooke, Francesc Alted, and others' author = 'David M. Cooke, Francesc Alted, and others' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '2.6' # The full version, including alpha/beta/rc tags. release = '2.6.3.dev0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # # today = '' # # Else, today_fmt is used as the format for a strftime call. # # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The reST default role (used for this markup: `text`) to use for all # documents. # # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. # # html_title = 'numexpr va0' # A shorter title for the navigation bar. Default is the same as html_title. # # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # # html_additional_pages = {} # If false, no module index is generated. # # html_domain_indices = True # If false, no index is generated. # # html_use_index = True # If true, the index is split into individual pages for each letter. # # html_split_index = False # If true, links to the reST sources are added to the pages. # # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' # # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'numexprdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). #latex_documents = [ # (master_doc, 'numexpr.tex', 'numexpr Documentation', # 'David M. Cooke, Francesc Alted, and others', 'manual'), #] # The name of an image file (relative to this directory) to place at the top of # the title page. # # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # # latex_use_parts = False # If true, show page references after internal links. # # latex_show_pagerefs = False # If true, show URL addresses after external links. # # latex_show_urls = False # Documents to append as an appendix to all manuals. # # latex_appendices = [] # It false, will not define \strong, \code, itleref, \crossref ... but only # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added # packages. # # latex_keep_old_macro_names = True # If false, no module index is generated. # # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). #man_pages = [ # (master_doc, 'numexpr', 'numexpr Documentation', # [author], 1) #] # If true, show URL addresses after external links. # # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) #texinfo_documents = [ # (master_doc, 'numexpr', 'numexpr Documentation', # author, 'numexpr', 'One line description of project.', # 'Miscellaneous'), #] # Documents to append as an appendix to all manuals. # # texinfo_appendices = [] # If false, no module index is generated. # # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # # texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'https://docs.python.org/': None} numexpr-2.7.1/doc/index.rst000066400000000000000000000007411360375525100156360ustar00rootroot00000000000000.. numexpr documentation master file, created by sphinx-quickstart on Sat Feb 4 17:19:36 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. NumExpr Documentation Reference ================================= Contents: .. toctree:: :maxdepth: 2 intro user_guide vm2 mkl api release_notes Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` numexpr-2.7.1/doc/intro.rst000066400000000000000000000102411360375525100156560ustar00rootroot00000000000000How it works ============ The string passed to :code:`evaluate` is compiled into an object representing the expression and types of the arrays used by the function :code:`numexpr`. The expression is first compiled using Python's :code:`compile` function (this means that the expressions have to be valid Python expressions). From this, the variable names can be taken. The expression is then evaluated using instances of a special object that keep track of what is being done to them, and which builds up the parse tree of the expression. This parse tree is then compiled to a bytecode program, which describes how to perform the operation element-wise. The virtual machine uses "vector registers": each register is many elements wide (by default 4096 elements). The key to NumExpr's speed is handling chunks of elements at a time. There are two extremes to evaluating an expression elementwise. You can do each operation as arrays, returning temporary arrays. This is what you do when you use NumPy: :code:`2*a+3*b` uses three temporary arrays as large as :code:`a` or :code:`b`. This strategy wastes memory (a problem if your arrays are large), and also is not a good use of cache memory: for large arrays, the results of :code:`2*a` and :code:`3*b` won't be in cache when you do the add. The other extreme is to loop over each element, as in:: for i in xrange(len(a)): c[i] = 2*a[i] + 3*b[i] This doesn't consume extra memory, and is good for the cache, but, if the expression is not compiled to machine code, you will have a big case statement (or a bunch of if's) inside the loop, which adds a large overhead for each element, and will hurt the branch-prediction used on the CPU. :code:`numexpr` uses a in-between approach. Arrays are handled as chunks (of 4096 elements) at a time, using a register machine. As Python code, it looks something like this:: for i in xrange(0, len(a), 256): r0 = a[i:i+128] r1 = b[i:i+128] multiply(r0, 2, r2) multiply(r1, 3, r3) add(r2, r3, r2) c[i:i+128] = r2 (remember that the 3-arg form stores the result in the third argument, instead of allocating a new array). This achieves a good balance between cache and branch-prediction. And the virtual machine is written entirely in C, which makes it faster than the Python above. Furthermore the virtual machine is also multi-threaded, which allows for efficient parallelization of NumPy operations. There is some more information and history at: http://www.bitsofbits.com/2014/09/21/numpy-micro-optimization-and-numexpr/ Expected performance ==================== The range of speed-ups for NumExpr respect to NumPy can vary from 0.95x and 20x, being 2x, 3x or 4x typical values, depending on the complexity of the expression and the internal optimization of the operators used. The strided and unaligned case has been optimized too, so if the expression contains such arrays, the speed-up can increase significantly. Of course, you will need to operate with large arrays (typically larger than the cache size of your CPU) to see these improvements in performance. Here there are some real timings. For the contiguous case:: In [1]: import numpy as np In [2]: import numexpr as ne In [3]: a = np.random.rand(1e6) In [4]: b = np.random.rand(1e6) In [5]: timeit 2*a + 3*b 10 loops, best of 3: 18.9 ms per loop In [6]: timeit ne.evaluate("2*a + 3*b") 100 loops, best of 3: 5.83 ms per loop # 3.2x: medium speed-up (simple expr) In [7]: timeit 2*a + b**10 10 loops, best of 3: 158 ms per loop In [8]: timeit ne.evaluate("2*a + b**10") 100 loops, best of 3: 7.59 ms per loop # 20x: large speed-up due to optimised pow() For unaligned arrays, the speed-ups can be even larger:: In [9]: a = np.empty(1e6, dtype="b1,f8")['f1'] In [10]: b = np.empty(1e6, dtype="b1,f8")['f1'] In [11]: a.flags.aligned, b.flags.aligned Out[11]: (False, False) In [12]: a[:] = np.random.rand(len(a)) In [13]: b[:] = np.random.rand(len(b)) In [14]: timeit 2*a + 3*b 10 loops, best of 3: 29.5 ms per loop In [15]: timeit ne.evaluate("2*a + 3*b") 100 loops, best of 3: 7.46 ms per loop # ~ 4x speed-up numexpr-2.7.1/doc/make.bat000066400000000000000000000164371360375525100154130ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. epub3 to make an epub3 echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled echo. coverage to run coverage check of the documentation if enabled echo. dummy to check syntax errors of document sources goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) REM Check if sphinx-build is available and fallback to Python version if any %SPHINXBUILD% 1>NUL 2>NUL if errorlevel 9009 goto sphinx_python goto sphinx_ok :sphinx_python set SPHINXBUILD=python -m sphinx.__init__ %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) :sphinx_ok if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\numexpr3.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\numexpr3.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "epub3" ( %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %~dp0 echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %~dp0 echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "coverage" ( %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage if errorlevel 1 exit /b 1 echo. echo.Testing of coverage in the sources finished, look at the ^ results in %BUILDDIR%/coverage/python.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) if "%1" == "dummy" ( %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy if errorlevel 1 exit /b 1 echo. echo.Build finished. Dummy builder generates no files. goto end ) :end numexpr-2.7.1/doc/mkl.rst000066400000000000000000000130771360375525100153200ustar00rootroot00000000000000NumExpr with Intel MKL ====================== Numexpr has support for Intel's VML (included in Intel's MKL) in order to accelerate the evaluation of transcendental functions on Intel CPUs. Here it is a small example on the kind of improvement you may get by using it. A first benchmark ----------------- Firstly, we are going to exercise how MKL performs when computing a couple of simple expressions. One is a pure algebraic one: :code:`2*y + 4*x` and the other contains transcendental functions: :code:`sin(x)**2 + cos(y)**2`. For this, we are going to use this worksheet_. I (Francesc Alted) ran this benchmark on a Intel Xeon E3-1245 v5 @ 3.50GHz. Here are the results when not using MKL:: NumPy version: 1.11.1 Time for an algebraic expression: 0.168 s / 6.641 GB/s Time for a transcendental expression: 1.945 s / 0.575 GB/s Numexpr version: 2.6.1. Using MKL: False Time for an algebraic expression: 0.058 s / 19.116 GB/s Time for a transcendental expression: 0.283 s / 3.950 GB/s And now, using MKL:: NumPy version: 1.11.1 Time for an algebraic expression: 0.169 s / 6.606 GB/s Time for a transcendental expression: 1.943 s / 0.575 GB/s Numexpr version: 2.6.1. Using MKL: True Time for an algebraic expression: 0.058 s / 19.153 GB/s Time for a transcendental expression: 0.075 s / 14.975 GB/s As you can see, numexpr using MKL can be up to 3.8x faster for the case of the transcendental expression. Also, you can notice that the pure algebraic expression is not accelerated at all. This is completely expected, as the MKL is offering accelerations for CPU bounded functions (sin, cos, tan, exp, log, sinh...) and not pure multiplications or adds. Finally, note how numexpr+MKL can be up to 26x faster than using a pure NumPy solution. And this was using a processor with just four physical cores; you should expect more speedup as you throw more cores at that. .. _worksheet: https://github.com/pydata/numexpr/blob/master/bench/vml_timing2.py More benchmarks (older) ----------------------- Numexpr & VML can both use several threads for doing computations. Let's see how performance improves by using 1 or 2 threads on a 2-core Intel CPU (Core2 E8400 @ 3.00GHz). Using 1 thread ^^^^^^^^^^^^^^ Here we have some benchmarks on the improvement of speed that Intel's VML can achieve. First, look at times by some easy expression containing sine and cosine operations *without* using VML:: In [17]: ne.use_vml Out[17]: False In [18]: x = np.linspace(-1, 1, 1e6) In [19]: timeit np.sin(x)**2+np.cos(x)**2 10 loops, best of 3: 43.1 ms per loop In [20]: ne.set_num_threads(1) Out[20]: 2 In [21]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 10 loops, best of 3: 29.5 ms per loop and now using VML:: In [37]: ne.use_vml Out[37]: True In [38]: x = np.linspace(-1, 1, 1e6) In [39]: timeit np.sin(x)**2+np.cos(x)**2 10 loops, best of 3: 42.8 ms per loop In [40]: ne.set_num_threads(1) Out[40]: 2 In [41]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 19.8 ms per loop Hey, VML can accelerate computations by a 50% using a single CPU. That's great! Using 2 threads ^^^^^^^^^^^^^^^ First, look at the time of the non-VML numexpr when using 2 threads:: In [22]: ne.set_num_threads(2) Out[22]: 1 In [23]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 15.3 ms per loop OK. We've got an almost perfect 2x improvement in speed with regard to the 1 thread case. Let's see about the VML-powered numexpr version:: In [43]: ne.set_num_threads(2) Out[43]: 1 In [44]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 12.2 ms per loop Ok, that's about 1.6x improvement over the 1 thread VML computation, and still a 25% of improvement over the non-VML version. Good, native numexpr multithreading code really looks very efficient! Numexpr native threading code vs VML's one ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You may already know that both numexpr and Intel's VML do have support for multithreaded computations, but you might be curious about which one is more efficient, so here it goes a hint. First, using the VML multithreaded implementation:: In [49]: ne.set_vml_num_threads(2) In [50]: ne.set_num_threads(1) Out[50]: 1 In [51]: ne.set_vml_num_threads(2) In [52]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 16.8 ms per loop and now, using the native numexpr threading code:: In [53]: ne.set_num_threads(2) Out[53]: 1 In [54]: ne.set_vml_num_threads(1) In [55]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 12 ms per loop This means that numexpr's native multithreaded code is about 40% faster than VML's for this case. So, in general, you should use the former with numexpr (and this is the default actually). Mixing numexpr's and VML multithreading capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Finally, you might be tempted to use both multithreading codes at the same time, but you will be deceived about the improvement in performance:: In [57]: ne.set_vml_num_threads(2) In [58]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 17.7 ms per loop Your code actually performs much worse. That's normal too because you are trying to run 4 threads on a 2-core CPU. For CPUs with many cores, you may want to try with different threading configurations, but as a rule of thumb, numexpr's one will generally win.numexpr-2.7.1/doc/release_notes.rst000066400000000000000000000000761360375525100173600ustar00rootroot00000000000000Release Notes ============= .. include:: ../RELEASE_NOTES.rstnumexpr-2.7.1/doc/requirements.txt000066400000000000000000000000251360375525100172540ustar00rootroot00000000000000numpy>=1.7 numpydoc numexpr-2.7.1/doc/user_guide.rst000066400000000000000000000321711360375525100166640ustar00rootroot00000000000000NumExpr 2.0 User Guide ====================== The :code:`numexpr` package supplies routines for the fast evaluation of array expressions elementwise by using a vector-based virtual machine. Using it is simple:: >>> import numpy as np >>> import numexpr as ne >>> a = np.arange(10) >>> b = np.arange(0, 20, 2) >>> c = ne.evaluate("2*a+3*b") >>> c array([ 0, 8, 16, 24, 32, 40, 48, 56, 64, 72]) Building -------- *NumExpr* requires Python_ 2.6 or greater, and NumPy_ 1.7 or greater. It is built in the standard Python way: .. code-block:: bash $ python setup.py build $ python setup.py install You must have a C-compiler (i.e. MSVC on Windows and GCC on Linux) installed. Then change to a directory that is not the repository directory (e.g. `/tmp`) and test :code:`numexpr` with: .. code-block:: bash $ python -c "import numexpr; numexpr.test()" .. _Python: http://python.org .. _NumPy: http://numpy.scipy.org Enabling Intel VML support -------------------------- Starting from release 1.2 on, numexpr includes support for Intel's VML library. This allows for better performance on Intel architectures, mainly when evaluating transcendental functions (trigonometrical, exponential, ...). It also enables numexpr using several CPU cores. If you have Intel's MKL (the library that embeds VML), just copy the :code:`site.cfg.example` that comes in the distribution to :code:`site.cfg` and edit the latter giving proper directions on how to find your MKL libraries in your system. After doing this, you can proceed with the usual building instructions listed above. Pay attention to the messages during the building process in order to know whether MKL has been detected or not. Finally, you can check the speed-ups on your machine by running the :code:`bench/vml_timing.py` script (you can play with different parameters to the :code:`set_vml_accuracy_mode()` and :code:`set_vml_num_threads()` functions in the script so as to see how it would affect performance). Threadpool Configuration ------------------------ Threads are spawned at import-time, with the number being set by the environment variable ``NUMEXPR_MAX_THREADS``. The default maximum thread count is **64**. There is no advantage to spawning more threads than the number of virtual cores available on the computing node. Practically NumExpr scales at large thread count (`> 8`) only on very large matrices (`> 2**22`). Spawning large numbers of threads is not free, and can increase import times for NumExpr or packages that import it such as Pandas or PyTables. If desired, the number of threads in the pool used can be adjusted via an environment variable, ``NUMEXPR_NUM_THREADS`` (preferred) or ``OMP_NUM_THREADS``. Typically only setting ``NUMEXPR_MAX_THREADS`` is sufficient; the number of threads used can be adjusted dynamically via ``numexpr.set_num_threads(int)``. The number of threads can never exceed that set by ``NUMEXPR_MAX_THREADS``. If the user has not configured the environment prior to importing NumExpr, info logs will be generated, and the initial number of threads _that are used_ will be set to the number of cores detected in the system or 8, whichever is *less*. Usage:: import os os.environ['NUMEXPR_MAX_THREADS'] = '16' os.environ['NUMEXPR_NUM_THREADS'] = '8' import numexpr as ne Usage Notes ----------- `NumExpr`'s principal routine is:: evaluate(ex, local_dict=None, global_dict=None, optimization='aggressive', truediv='auto') where :code:`ex` is a string forming an expression, like :code:`"2*a+3*b"`. The values for :code:`a` and :code:`b` will by default be taken from the calling function's frame (through the use of :code:`sys._getframe()`). Alternatively, they can be specified using the :code:`local_dict` or :code:`global_dict` arguments, or passed as keyword arguments. The :code:`optimization` parameter can take the values :code:`'moderate'` or :code:`'aggressive'`. :code:`'moderate'` means that no optimization is made that can affect precision at all. :code:`'aggressive'` (the default) means that the expression can be rewritten in a way that precision *could* be affected, but normally very little. For example, in :code:`'aggressive'` mode, the transformation :code:`x~**3` -> :code:`x*x*x` is made, but not in :code:`'moderate'` mode. The `truediv` parameter specifies whether the division is a 'floor division' (False) or a 'true division' (True). The default is the value of `__future__.division` in the interpreter. See PEP 238 for details. Expressions are cached, so reuse is fast. Arrays or scalars are allowed for the variables, which must be of type 8-bit boolean (bool), 32-bit signed integer (int), 64-bit signed integer (long), double-precision floating point number (float), 2x64-bit, double-precision complex number (complex) or raw string of bytes (str). If they are not in the previous set of types, they will be properly upcasted for internal use (the result will be affected as well). The arrays must all be the same size. Datatypes supported internally ------------------------------ *NumExpr* operates internally only with the following types: * 8-bit boolean (bool) * 32-bit signed integer (int or int32) * 64-bit signed integer (long or int64) * 32-bit single-precision floating point number (float or float32) * 64-bit, double-precision floating point number (double or float64) * 2x64-bit, double-precision complex number (complex or complex128) * Raw string of bytes (str) If the arrays in the expression does not match any of these types, they will be upcasted to one of the above types (following the usual type inference rules, see below). Have this in mind when doing estimations about the memory consumption during the computation of your expressions. Also, the types in NumExpr conditions are somewhat stricter than those of Python. For instance, the only valid constants for booleans are :code:`True` and :code:`False`, and they are never automatically cast to integers. Casting rules ------------- Casting rules in NumExpr follow closely those of *NumPy*. However, for implementation reasons, there are some known exceptions to this rule, namely: * When an array with type :code:`int8`, :code:`uint8`, :code:`int16` or :code:`uint16` is used inside NumExpr, it is internally upcasted to an :code:`int` (or :code:`int32` in NumPy notation). * When an array with type :code:`uint32` is used inside NumExpr, it is internally upcasted to a :code:`long` (or :code:`int64` in NumPy notation). * A floating point function (e.g. :code:`sin`) acting on :code:`int8` or :code:`int16` types returns a :code:`float64` type, instead of the :code:`float32` that is returned by NumPy functions. This is mainly due to the absence of native :code:`int8` or :code:`int16` types in NumExpr. * In operations implying a scalar and an array, the normal rules of casting are used in NumExpr, in contrast with NumPy, where array types takes priority. For example, if :code:`a` is an array of type :code:`float32` and :code:`b` is an scalar of type :code:`float64` (or Python :code:`float` type, which is equivalent), then :code:`a*b` returns a :code:`float64` in NumExpr, but a :code:`float32` in NumPy (i.e. array operands take priority in determining the result type). If you need to keep the result a :code:`float32`, be sure you use a :code:`float32` scalar too. Supported operators ------------------- *NumExpr* supports the set of operators listed below: * Logical operators: :code:`&, |, ~` * Comparison operators: :code:`<, <=, ==, !=, >=, >` * Unary arithmetic operators: :code:`-` * Binary arithmetic operators: :code:`+, -, *, /, **, %, <<, >>` Supported functions ------------------- The next are the current supported set: * :code:`where(bool, number1, number2): number` -- number1 if the bool condition is true, number2 otherwise. * :code:`{sin,cos,tan}(float|complex): float|complex` -- trigonometric sine, cosine or tangent. * :code:`{arcsin,arccos,arctan}(float|complex): float|complex` -- trigonometric inverse sine, cosine or tangent. * :code:`arctan2(float1, float2): float` -- trigonometric inverse tangent of float1/float2. * :code:`{sinh,cosh,tanh}(float|complex): float|complex` -- hyperbolic sine, cosine or tangent. * :code:`{arcsinh,arccosh,arctanh}(float|complex): float|complex` -- hyperbolic inverse sine, cosine or tangent. * :code:`{log,log10,log1p}(float|complex): float|complex` -- natural, base-10 and log(1+x) logarithms. * :code:`{exp,expm1}(float|complex): float|complex` -- exponential and exponential minus one. * :code:`sqrt(float|complex): float|complex` -- square root. * :code:`abs(float|complex): float|complex` -- absolute value. * :code:`conj(complex): complex` -- conjugate value. * :code:`{real,imag}(complex): float` -- real or imaginary part of complex. * :code:`complex(float, float): complex` -- complex from real and imaginary parts. * :code:`contains(str, str): bool` -- returns True for every string in :code:`op1` that contains :code:`op2`. Notes ----- * :code:`abs()` for complex inputs returns a :code:`complex` output too. This is a departure from NumPy where a :code:`float` is returned instead. However, NumExpr is not flexible enough yet so as to allow this to happen. Meanwhile, if you want to mimic NumPy behaviour, you may want to select the real part via the :code:`real` function (e.g. :code:`real(abs(cplx))`) or via the :code:`real` selector (e.g. :code:`abs(cplx).real`). More functions can be added if you need them. Note however that NumExpr 2.6 is in maintenance mode and a new major revision is under development. Supported reduction operations ------------------------------ The next are the current supported set: * :code:`sum(number, axis=None)`: Sum of array elements over a given axis. Negative axis are not supported. * :code:`prod(number, axis=None)`: Product of array elements over a given axis. Negative axis are not supported. *Note:* because of internal limitations, reduction operations must appear the last in the stack. If not, it will be issued an error like:: >>> ne.evaluate('sum(1)*(-1)') RuntimeError: invalid program: reduction operations must occur last General routines ---------------- * :code:`evaluate(expression, local_dict=None, global_dict=None, optimization='aggressive', truediv='auto')`: Evaluate a simple array expression element-wise. See examples above. * :code:`re_evaluate(local_dict=None)`: Re-evaluate the last array expression without any check. This is meant for accelerating loops that are re-evaluating the same expression repeatedly without changing anything else than the operands. If unsure, use evaluate() which is safer. * :code:`test()`: Run all the tests in the test suite. * :code:`print_versions()`: Print the versions of software that numexpr relies on. * :code:`set_num_threads(nthreads)`: Sets a number of threads to be used in operations. Returns the previous setting for the number of threads. See note below to see how the number of threads is set via environment variables. If you are using VML, you may want to use *set_vml_num_threads(nthreads)* to perform the parallel job with VML instead. However, you should get very similar performance with VML-optimized functions, and VML's parallelizer cannot deal with common expressions like `(x+1)*(x-2)`, while NumExpr's one can. * :code:`detect_number_of_cores()`: Detects the number of cores on a system. Intel's VML specific support routines ------------------------------------- When compiled with Intel's VML (Vector Math Library), you will be able to use some additional functions for controlling its use. These are: * :code:`set_vml_accuracy_mode(mode)`: Set the accuracy for VML operations. The :code:`mode` parameter can take the values: - :code:`'low'`: Equivalent to VML_LA - low accuracy VML functions are called - :code:`'high'`: Equivalent to VML_HA - high accuracy VML functions are called - :code:`'fast'`: Equivalent to VML_EP - enhanced performance VML functions are called It returns the previous mode. This call is equivalent to the :code:`vmlSetMode()` in the VML library. See: http://www.intel.com/software/products/mkl/docs/webhelp/vml/vml_DataTypesAccuracyModes.html for more info on the accuracy modes. * :code:`set_vml_num_threads(nthreads)`: Suggests a maximum number of threads to be used in VML operations. This function is equivalent to the call :code:`mkl_domain_set_num_threads(nthreads, MKL_VML)` in the MKL library. See: http://www.intel.com/software/products/mkl/docs/webhelp/support/functn_mkl_domain_set_num_threads.html for more info about it. * :code:`get_vml_version()`: Get the VML/MKL library version. Authors ------- .. include:: ../AUTHORS.txt License ------- NumExpr is distributed under the MIT_ license. .. _MIT: http://www.opensource.org/licenses/mit-license.phpnumexpr-2.7.1/doc/vm2.rst000066400000000000000000000101551360375525100152330ustar00rootroot00000000000000Performance of the Virtual Machine in NumExpr2.0 ================================================ Numexpr 2.0 leverages a new virtual machine completely based on the new ndarray iterator introduced in NumPy 1.6. This represents a nice combination of the advantages of using the new iterator, while retaining the ability to avoid copies in memory as well as the multi-threading capabilities of the previous virtual machine (1.x series). The increased performance of the new virtual machine can be seen in several scenarios, like: * *Broadcasting*. Expressions containing arrays that needs to be broadcasted, will not need additional memory (i.e. they will be broadcasted on-the-fly). * *Non-native dtypes*. These will be translated to native dtypes on-the-fly, so there is not need to convert the whole arrays first. * *Fortran-ordered arrays*. The new iterator will find the best path to optimize operations on such arrays, without the need to transpose them first. There is a drawback though: performance with small arrays suffers a bit because of higher set-up times for the new virtual machine. See below for detailed benchmarks. Some benchmarks for best-case scenarios --------------------------------------- Here you have some benchmarks of some scenarios where the new virtual machine actually represents an advantage in terms of speed (also memory, but this is not shown here). As you will see, the improvement is notable in many areas, ranging from 3x to 6x faster operations. Broadcasting ^^^^^^^^^^^^ >>> a = np.arange(1e3) >>> b = np.arange(1e6).reshape(1e3, 1e3) >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 100 loops, best of 3: 16.4 ms per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 100 loops, best of 3: 5.2 ms per loop Non-native types ^^^^^^^^^^^^^^^^ >>> a = np.arange(1e6, dtype=">f8") >>> b = np.arange(1e6, dtype=">f8") >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 100 loops, best of 3: 17.2 ms per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 100 loops, best of 3: 6.32 ms per loop Fortran-ordered arrays ^^^^^^^^^^^^^^^^^^^^^^ >>> a = np.arange(1e6).reshape(1e3, 1e3).copy('F') >>> b = np.arange(1e6).reshape(1e3, 1e3).copy('F') >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 10 loops, best of 3: 32.8 ms per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 100 loops, best of 3: 5.62 ms per loop Mix of 'non-native' arrays, Fortran-ordered, and using broadcasting ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ >>> a = np.arange(1e3, dtype='>f8').copy('F') >>> b = np.arange(1e6, dtype='>f8').reshape(1e3, 1e3).copy('F') >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 10 loops, best of 3: 21.2 ms per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 100 loops, best of 3: 5.22 ms per loop Longer setup-time ^^^^^^^^^^^^^^^^^ The only drawback of the new virtual machine is during the computation of small arrays:: >>> a = np.arange(10) >>> b = np.arange(10) >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 10000 loops, best of 3: 22.1 µs per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 10000 loops, best of 3: 30.6 µs per loop i.e. the new virtual machine takes a bit more time to set-up (around 8 µs in this machine). However, this should be not too important because for such a small arrays NumPy is always a better option:: >>> timeit c = a*(b+1) 100000 loops, best of 3: 4.16 µs per loop And for arrays large enough the difference is negligible:: >>> a = np.arange(1e6) >>> b = np.arange(1e6) >>> timeit ne.evaluate("a*(b+1)") # 1.4.2 100 loops, best of 3: 5.77 ms per loop >>> timeit ne.evaluate("a*(b+1)") # 2.0 100 loops, best of 3: 5.77 ms per loop Conclusion ---------- The new virtual machine introduced in numexpr 2.0 brings more performance in many different scenarios (broadcast, non-native dtypes, Fortran-orderd arrays), while it shows slightly worse performance for small arrays. However, as numexpr is more geared to compute large arrays, the new virtual machine should be good news for numexpr users in general.numexpr-2.7.1/numexpr/000077500000000000000000000000001360375525100147245ustar00rootroot00000000000000numexpr-2.7.1/numexpr/__init__.py000066400000000000000000000044241360375525100170410ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### """ Numexpr is a fast numerical expression evaluator for NumPy. With it, expressions that operate on arrays (like "3*a+4*b") are accelerated and use less memory than doing the same calculation in Python. See: https://github.com/pydata/numexpr for more info about it. """ from __config__ import show as show_config, get_info if get_info('mkl'): use_vml = True else: use_vml = False is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE # cpuinfo imports were moved into the test submodule function that calls them # to improve import times. import os, os.path import platform from numexpr.expressions import E from numexpr.necompiler import NumExpr, disassemble, evaluate, re_evaluate from numexpr.interpreter import MAX_THREADS from numexpr.utils import (_init_num_threads, get_vml_version, set_vml_accuracy_mode, set_vml_num_threads, set_num_threads, detect_number_of_cores, detect_number_of_threads) # Detect the number of cores ncores = detect_number_of_cores() # Initialize the number of threads to be used nthreads = _init_num_threads() # The default for VML is 1 thread (see #39) set_vml_num_threads(1) import version __version__ = version.version def print_versions(): """Print the versions of software that numexpr relies on.""" try: import numexpr.tests return numexpr.tests.print_versions() except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') def test(verbosity=1): """Run all the tests in the test suite.""" try: import numexpr.tests return numexpr.tests.test(verbosity=verbosity) except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.')numexpr-2.7.1/numexpr/complex_functions.hpp000066400000000000000000000212201360375525100211710ustar00rootroot00000000000000#ifndef NUMEXPR_COMPLEX_FUNCTIONS_HPP #define NUMEXPR_COMPLEX_FUNCTIONS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ // TODO: Could just use std::complex and std::complex /* constants */ static npy_cdouble nc_1 = {1., 0.}; static npy_cdouble nc_half = {0.5, 0.}; static npy_cdouble nc_i = {0., 1.}; static npy_cdouble nc_i2 = {0., 0.5}; /* static npy_cdouble nc_mi = {0., -1.}; static npy_cdouble nc_pi2 = {M_PI/2., 0.}; */ /* *************************** WARNING ***************************** Due to the way Numexpr places the results of operations, the *x and *r pointers do point to the same address (apparently this doesn't happen in NumPy). So, measures should be taken so as to not to reuse *x after the first *r has been overwritten. ********************************************************************* */ static void nc_assign(npy_cdouble *x, npy_cdouble *r) { r->real = x->real; r->imag = x->imag; return; } static void nc_sum(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { r->real = a->real + b->real; r->imag = a->imag + b->imag; return; } static void nc_diff(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { r->real = a->real - b->real; r->imag = a->imag - b->imag; return; } static void nc_neg(npy_cdouble *a, npy_cdouble *r) { r->real = -a->real; r->imag = -a->imag; return; } static void nc_conj(npy_cdouble *a, npy_cdouble *r) { r->real = a->real; r->imag = -a->imag; return; } // Needed for allowing the internal casting in numexpr machinery for // conjugate operations inline float fconjf(float x) { return x; } // Needed for allowing the internal casting in numexpr machinery for // conjugate operations inline double fconj(double x) { return x; } static void nc_prod(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; r->real = ar*br - ai*bi; r->imag = ar*bi + ai*br; return; } static void nc_quot(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; double d = br*br + bi*bi; r->real = (ar*br + ai*bi)/d; r->imag = (ai*br - ar*bi)/d; return; } static void nc_sqrt(npy_cdouble *x, npy_cdouble *r) { double s,d; if (x->real == 0. && x->imag == 0.) *r = *x; else { s = sqrt((fabs(x->real) + hypot(x->real,x->imag))/2); d = x->imag/(2*s); if (x->real > 0.) { r->real = s; r->imag = d; } else if (x->imag >= 0.) { r->real = d; r->imag = s; } else { r->real = -d; r->imag = -s; } } return; } static void nc_log(npy_cdouble *x, npy_cdouble *r) { double l = hypot(x->real,x->imag); r->imag = atan2(x->imag, x->real); r->real = log(l); return; } static void nc_log1p(npy_cdouble *x, npy_cdouble *r) { double l = hypot(x->real + 1.0,x->imag); r->imag = atan2(x->imag, x->real + 1.0); r->real = log(l); return; } static void nc_exp(npy_cdouble *x, npy_cdouble *r) { double a = exp(x->real); r->real = a*cos(x->imag); r->imag = a*sin(x->imag); return; } static void nc_expm1(npy_cdouble *x, npy_cdouble *r) { double a = exp(x->real); r->real = a*cos(x->imag) - 1.0; r->imag = a*sin(x->imag); return; } static void nc_pow(npy_cdouble *a, npy_cdouble *b, npy_cdouble *r) { npy_intp n; double ar=a->real, br=b->real, ai=a->imag, bi=b->imag; if (br == 0. && bi == 0.) { r->real = 1.; r->imag = 0.; return; } if (ar == 0. && ai == 0.) { r->real = 0.; r->imag = 0.; return; } if (bi == 0 && (n=(npy_intp)br) == br) { if (n > -100 && n < 100) { npy_cdouble p, aa; npy_intp mask = 1; if (n < 0) n = -n; aa = nc_1; p.real = ar; p.imag = ai; while (1) { if (n & mask) nc_prod(&aa,&p,&aa); mask <<= 1; if (n < mask || mask <= 0) break; nc_prod(&p,&p,&p); } r->real = aa.real; r->imag = aa.imag; if (br < 0) nc_quot(&nc_1, r, r); return; } } /* complexobject.c uses an inline version of this formula investigate whether this had better performance or accuracy */ nc_log(a, r); nc_prod(r, b, r); nc_exp(r, r); return; } static void nc_prodi(npy_cdouble *x, npy_cdouble *r) { double xr = x->real; r->real = -x->imag; r->imag = xr; return; } static void nc_acos(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_assign(x, pa); nc_prod(x,x,r); nc_diff(&nc_1, r, r); nc_sqrt(r, r); nc_prodi(r, r); nc_sum(pa, r, r); nc_log(r, r); nc_prodi(r, r); nc_neg(r, r); return; /* return nc_neg(nc_prodi(nc_log(nc_sum(x,nc_prod(nc_i, nc_sqrt(nc_diff(nc_1,nc_prod(x,x)))))))); */ } static void nc_acosh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble t, a, *pa=&a; nc_assign(x, pa); nc_sum(x, &nc_1, &t); nc_sqrt(&t, &t); nc_diff(x, &nc_1, r); nc_sqrt(r, r); nc_prod(&t, r, r); nc_sum(pa, r, r); nc_log(r, r); return; /* return nc_log(nc_sum(x, nc_prod(nc_sqrt(nc_sum(x,nc_1)), nc_sqrt(nc_diff(x,nc_1))))); */ } static void nc_asin(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_prodi(x, pa); nc_prod(x, x, r); nc_diff(&nc_1, r, r); nc_sqrt(r, r); nc_sum(pa, r, r); nc_log(r, r); nc_prodi(r, r); nc_neg(r, r); return; /* return nc_neg(nc_prodi(nc_log(nc_sum(nc_prod(nc_i,x), nc_sqrt(nc_diff(nc_1,nc_prod(x,x))))))); */ } static void nc_asinh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_assign(x, pa); nc_prod(x, x, r); nc_sum(&nc_1, r, r); nc_sqrt(r, r); nc_sum(r, pa, r); nc_log(r, r); return; /* return nc_log(nc_sum(nc_sqrt(nc_sum(nc_1,nc_prod(x,x))),x)); */ } static void nc_atan(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, *pa=&a; nc_diff(&nc_i, x, pa); nc_sum(&nc_i, x, r); nc_quot(r, pa, r); nc_log(r,r); nc_prod(&nc_i2, r, r); return; /* return nc_prod(nc_i2,nc_log(nc_quot(nc_sum(nc_i,x),nc_diff(nc_i,x)))); */ } static void nc_atanh(npy_cdouble *x, npy_cdouble *r) { npy_cdouble a, b, *pa=&a, *pb=&b; nc_assign(x, pa); nc_diff(&nc_1, pa, r); nc_sum(&nc_1, pa, pb); nc_quot(pb, r, r); nc_log(r, r); nc_prod(&nc_half, r, r); return; /* return nc_prod(nc_half,nc_log(nc_quot(nc_sum(nc_1,x),nc_diff(nc_1,x)))); */ } static void nc_cos(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xr)*cosh(xi); r->imag = -sin(xr)*sinh(xi); return; } static void nc_cosh(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xi)*cosh(xr); r->imag = sin(xi)*sinh(xr); return; } #define M_LOG10_E 0.434294481903251827651128918916605082294397 static void nc_log10(npy_cdouble *x, npy_cdouble *r) { nc_log(x, r); r->real *= M_LOG10_E; r->imag *= M_LOG10_E; return; } static void nc_sin(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = sin(xr)*cosh(xi); r->imag = cos(xr)*sinh(xi); return; } static void nc_sinh(npy_cdouble *x, npy_cdouble *r) { double xr=x->real, xi=x->imag; r->real = cos(xi)*sinh(xr); r->imag = sin(xi)*cosh(xr); return; } static void nc_tan(npy_cdouble *x, npy_cdouble *r) { double sr,cr,shi,chi; double rs,is,rc,ic; double d; double xr=x->real, xi=x->imag; sr = sin(xr); cr = cos(xr); shi = sinh(xi); chi = cosh(xi); rs = sr*chi; is = cr*shi; rc = cr*chi; ic = -sr*shi; d = rc*rc + ic*ic; r->real = (rs*rc+is*ic)/d; r->imag = (is*rc-rs*ic)/d; return; } static void nc_tanh(npy_cdouble *x, npy_cdouble *r) { double si,ci,shr,chr; double rs,is,rc,ic; double d; double xr=x->real, xi=x->imag; si = sin(xi); ci = cos(xi); shr = sinh(xr); chr = cosh(xr); rs = ci*shr; is = si*chr; rc = ci*chr; ic = si*shr; d = rc*rc + ic*ic; r->real = (rs*rc+is*ic)/d; r->imag = (is*rc-rs*ic)/d; return; } static void nc_abs(npy_cdouble *x, npy_cdouble *r) { r->real = sqrt(x->real*x->real + x->imag*x->imag); r->imag = 0; } #endif // NUMEXPR_COMPLEX_FUNCTIONS_HPP numexpr-2.7.1/numexpr/cpuinfo.py000077500000000000000000000611411360375525100167470ustar00rootroot00000000000000#!/usr/bin/env python ################################################################### # cpuinfo - Get information about CPU # # License: BSD # Author: Pearu Peterson # # See LICENSES/cpuinfo.txt for details about copyright and # rights to use. #################################################################### """ cpuinfo Copyright 2002 Pearu Peterson all rights reserved, Pearu Peterson Permission to use, modify, and distribute this software is given under the terms of the NumPy (BSD style) license. See LICENSE.txt that came with this distribution for specifics. NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. Pearu Peterson """ __all__ = ['cpu'] import sys, re, types import os import subprocess import warnings import platform import inspect is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE def getoutput(cmd, successful_status=(0,), stacklevel=1): try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = p.communicate() status = p.returncode except EnvironmentError as e: warnings.warn(str(e), UserWarning, stacklevel=stacklevel) return False, '' if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: return True, output return False, output def command_info(successful_status=(0,), stacklevel=1, **kw): info = {} for key in kw: ok, output = getoutput(kw[key], successful_status=successful_status, stacklevel=stacklevel + 1) if ok: info[key] = output.strip() return info def command_by_line(cmd, successful_status=(0,), stacklevel=1): ok, output = getoutput(cmd, successful_status=successful_status, stacklevel=stacklevel + 1) if not ok: return # XXX: check output = output.decode('ascii') for line in output.splitlines(): yield line.strip() def key_value_from_command(cmd, sep, successful_status=(0,), stacklevel=1): d = {} for line in command_by_line(cmd, successful_status=successful_status, stacklevel=stacklevel + 1): l = [s.strip() for s in line.split(sep, 1)] if len(l) == 2: d[l[0]] = l[1] return d class CPUInfoBase(object): """Holds CPU information and provides methods for requiring the availability of various CPU features. """ def _try_call(self, func): try: return func() except: pass def __getattr__(self, name): if not name.startswith('_'): if hasattr(self, '_' + name): attr = getattr(self, '_' + name) if inspect.ismethod(attr): return lambda func=self._try_call, attr=attr: func(attr) else: return lambda: None raise AttributeError(name) def _getNCPUs(self): return 1 def __get_nbits(self): abits = platform.architecture()[0] nbits = re.compile('(\d+)bit').search(abits).group(1) return nbits def _is_32bit(self): return self.__get_nbits() == '32' def _is_64bit(self): return self.__get_nbits() == '64' class LinuxCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = [{}] ok, output = getoutput(['uname', '-m']) if ok: info[0]['uname_m'] = output.strip() try: fo = open('/proc/cpuinfo') except EnvironmentError as e: warnings.warn(str(e), UserWarning) else: for line in fo: name_value = [s.strip() for s in line.split(':', 1)] if len(name_value) != 2: continue name, value = name_value if not info or name in info[-1]: # next processor info.append({}) info[-1][name] = value fo.close() self.__class__.info = info def _not_impl(self): pass # Athlon def _is_AMD(self): return self.info[0]['vendor_id'] == 'AuthenticAMD' def _is_AthlonK6_2(self): return self._is_AMD() and self.info[0]['model'] == '2' def _is_AthlonK6_3(self): return self._is_AMD() and self.info[0]['model'] == '3' def _is_AthlonK6(self): return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None def _is_AthlonK7(self): return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None def _is_AthlonMP(self): return re.match(r'.*?Athlon\(tm\) MP\b', self.info[0]['model name']) is not None def _is_AMD64(self): return self.is_AMD() and self.info[0]['family'] == '15' def _is_Athlon64(self): return re.match(r'.*?Athlon\(tm\) 64\b', self.info[0]['model name']) is not None def _is_AthlonHX(self): return re.match(r'.*?Athlon HX\b', self.info[0]['model name']) is not None def _is_Opteron(self): return re.match(r'.*?Opteron\b', self.info[0]['model name']) is not None def _is_Hammer(self): return re.match(r'.*?Hammer\b', self.info[0]['model name']) is not None # Alpha def _is_Alpha(self): return self.info[0]['cpu'] == 'Alpha' def _is_EV4(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' def _is_EV5(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' def _is_EV56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' def _is_PCA56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' # Intel #XXX _is_i386 = _not_impl def _is_Intel(self): return self.info[0]['vendor_id'] == 'GenuineIntel' def _is_i486(self): return self.info[0]['cpu'] == 'i486' def _is_i586(self): return self.is_Intel() and self.info[0]['cpu family'] == '5' def _is_i686(self): return self.is_Intel() and self.info[0]['cpu family'] == '6' def _is_Celeron(self): return re.match(r'.*?Celeron', self.info[0]['model name']) is not None def _is_Pentium(self): return re.match(r'.*?Pentium', self.info[0]['model name']) is not None def _is_PentiumII(self): return re.match(r'.*?Pentium.*?II\b', self.info[0]['model name']) is not None def _is_PentiumPro(self): return re.match(r'.*?PentiumPro\b', self.info[0]['model name']) is not None def _is_PentiumMMX(self): return re.match(r'.*?Pentium.*?MMX\b', self.info[0]['model name']) is not None def _is_PentiumIII(self): return re.match(r'.*?Pentium.*?III\b', self.info[0]['model name']) is not None def _is_PentiumIV(self): return re.match(r'.*?Pentium.*?(IV|4)\b', self.info[0]['model name']) is not None def _is_PentiumM(self): return re.match(r'.*?Pentium.*?M\b', self.info[0]['model name']) is not None def _is_Prescott(self): return self.is_PentiumIV() and self.has_sse3() def _is_Nocona(self): return (self.is_Intel() and self.info[0]['cpu family'] in ('6', '15') and # two s sse3; three s ssse3 not the same thing, this is fine (self.has_sse3() and not self.has_ssse3()) and re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) def _is_Core2(self): return (self.is_64bit() and self.is_Intel() and re.match(r'.*?Core\(TM\)2\b', self.info[0]['model name']) is not None) def _is_Itanium(self): return re.match(r'.*?Itanium\b', self.info[0]['family']) is not None def _is_XEON(self): return re.match(r'.*?XEON\b', self.info[0]['model name'], re.IGNORECASE) is not None _is_Xeon = _is_XEON # Power def _is_Power(self): return re.match(r'.*POWER.*', self.info[0]['cpu']) is not None def _is_Power7(self): return re.match(r'.*POWER7.*', self.info[0]['cpu']) is not None def _is_Power8(self): return re.match(r'.*POWER8.*', self.info[0]['cpu']) is not None def _is_Power9(self): return re.match(r'.*POWER9.*', self.info[0]['cpu']) is not None def _has_Altivec(self): return re.match(r'.*altivec\ supported.*', self.info[0]['cpu']) is not None # Varia def _is_singleCPU(self): return len(self.info) == 1 def _getNCPUs(self): return len(self.info) def _has_fdiv_bug(self): return self.info[0]['fdiv_bug'] == 'yes' def _has_f00f_bug(self): return self.info[0]['f00f_bug'] == 'yes' def _has_mmx(self): return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None def _has_sse(self): return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None def _has_sse2(self): return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None def _has_sse3(self): return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None def _has_ssse3(self): return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None def _has_3dnow(self): return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None def _has_3dnowext(self): return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None class IRIXCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = key_value_from_command('sysconf', sep=' ', successful_status=(0, 1)) self.__class__.info = info def _not_impl(self): pass def _is_singleCPU(self): return self.info.get('NUM_PROCESSORS') == '1' def _getNCPUs(self): return int(self.info.get('NUM_PROCESSORS', 1)) def __cputype(self, n): return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) def _is_r2000(self): return self.__cputype(2000) def _is_r3000(self): return self.__cputype(3000) def _is_r3900(self): return self.__cputype(3900) def _is_r4000(self): return self.__cputype(4000) def _is_r4100(self): return self.__cputype(4100) def _is_r4300(self): return self.__cputype(4300) def _is_r4400(self): return self.__cputype(4400) def _is_r4600(self): return self.__cputype(4600) def _is_r4650(self): return self.__cputype(4650) def _is_r5000(self): return self.__cputype(5000) def _is_r6000(self): return self.__cputype(6000) def _is_r8000(self): return self.__cputype(8000) def _is_r10000(self): return self.__cputype(10000) def _is_r12000(self): return self.__cputype(12000) def _is_rorion(self): return self.__cputype('orion') def get_ip(self): try: return self.info.get('MACHINE') except: pass def __machine(self, n): return self.info.get('MACHINE').lower() == 'ip%s' % (n) def _is_IP19(self): return self.__machine(19) def _is_IP20(self): return self.__machine(20) def _is_IP21(self): return self.__machine(21) def _is_IP22(self): return self.__machine(22) def _is_IP22_4k(self): return self.__machine(22) and self._is_r4000() def _is_IP22_5k(self): return self.__machine(22) and self._is_r5000() def _is_IP24(self): return self.__machine(24) def _is_IP25(self): return self.__machine(25) def _is_IP26(self): return self.__machine(26) def _is_IP27(self): return self.__machine(27) def _is_IP28(self): return self.__machine(28) def _is_IP30(self): return self.__machine(30) def _is_IP32(self): return self.__machine(32) def _is_IP32_5k(self): return self.__machine(32) and self._is_r5000() def _is_IP32_10k(self): return self.__machine(32) and self._is_r10000() class DarwinCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = command_info(arch='arch', machine='machine') info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') self.__class__.info = info def _not_impl(self): pass def _getNCPUs(self): return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) def _is_Power_Macintosh(self): return self.info['sysctl_hw']['hw.machine'] == 'Power Macintosh' def _is_i386(self): return self.info['arch'] == 'i386' def _is_ppc(self): return self.info['arch'] == 'ppc' def __machine(self, n): return self.info['machine'] == 'ppc%s' % n def _is_ppc601(self): return self.__machine(601) def _is_ppc602(self): return self.__machine(602) def _is_ppc603(self): return self.__machine(603) def _is_ppc603e(self): return self.__machine('603e') def _is_ppc604(self): return self.__machine(604) def _is_ppc604e(self): return self.__machine('604e') def _is_ppc620(self): return self.__machine(620) def _is_ppc630(self): return self.__machine(630) def _is_ppc740(self): return self.__machine(740) def _is_ppc7400(self): return self.__machine(7400) def _is_ppc7450(self): return self.__machine(7450) def _is_ppc750(self): return self.__machine(750) def _is_ppc403(self): return self.__machine(403) def _is_ppc505(self): return self.__machine(505) def _is_ppc801(self): return self.__machine(801) def _is_ppc821(self): return self.__machine(821) def _is_ppc823(self): return self.__machine(823) def _is_ppc860(self): return self.__machine(860) class NetBSDCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = {} info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') info['arch'] = info['sysctl_hw'].get('hw.machine_arch', 1) info['machine'] = info['sysctl_hw'].get('hw.machine', 1) self.__class__.info = info def _not_impl(self): pass def _getNCPUs(self): return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) def _is_Intel(self): if self.info['sysctl_hw'].get('hw.model', "")[0:5] == 'Intel': return True return False def _is_AMD(self): if self.info['sysctl_hw'].get('hw.model', "")[0:3] == 'AMD': return True return False class SunOSCPUInfo(CPUInfoBase): info = None def __init__(self): if self.info is not None: return info = command_info(arch='arch', mach='mach', uname_i=['uname', '-i'], isainfo_b=['isainfo', '-b'], isainfo_n=['isainfo', '-n'], ) info['uname_X'] = key_value_from_command(['uname', '-X'], sep='=') for line in command_by_line(['psrinfo', '-v', '0']): m = re.match(r'\s*The (?P

[\w\d]+) processor operates at', line) if m: info['processor'] = m.group('p') break self.__class__.info = info def _not_impl(self): pass def _is_i386(self): return self.info['isainfo_n'] == 'i386' def _is_sparc(self): return self.info['isainfo_n'] == 'sparc' def _is_sparcv9(self): return self.info['isainfo_n'] == 'sparcv9' def _getNCPUs(self): return int(self.info['uname_X'].get('NumCPU', 1)) def _is_sun4(self): return self.info['arch'] == 'sun4' def _is_SUNW(self): return re.match(r'SUNW', self.info['uname_i']) is not None def _is_sparcstation5(self): return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None def _is_ultra1(self): return re.match(r'.*Ultra-1', self.info['uname_i']) is not None def _is_ultra250(self): return re.match(r'.*Ultra-250', self.info['uname_i']) is not None def _is_ultra2(self): return re.match(r'.*Ultra-2', self.info['uname_i']) is not None def _is_ultra30(self): return re.match(r'.*Ultra-30', self.info['uname_i']) is not None def _is_ultra4(self): return re.match(r'.*Ultra-4', self.info['uname_i']) is not None def _is_ultra5_10(self): return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None def _is_ultra5(self): return re.match(r'.*Ultra-5', self.info['uname_i']) is not None def _is_ultra60(self): return re.match(r'.*Ultra-60', self.info['uname_i']) is not None def _is_ultra80(self): return re.match(r'.*Ultra-80', self.info['uname_i']) is not None def _is_ultraenterprice(self): return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None def _is_ultraenterprice10k(self): return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None def _is_sunfire(self): return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None def _is_ultra(self): return re.match(r'.*Ultra', self.info['uname_i']) is not None def _is_cpusparcv7(self): return self.info['processor'] == 'sparcv7' def _is_cpusparcv8(self): return self.info['processor'] == 'sparcv8' def _is_cpusparcv9(self): return self.info['processor'] == 'sparcv9' class Win32CPUInfo(CPUInfoBase): info = None pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" # XXX: what does the value of # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 # mean? def __init__(self): try: import _winreg except ImportError: # Python 3 import winreg as _winreg if self.info is not None: return info = [] try: #XXX: Bad style to use so long `try:...except:...`. Fix it! prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" "\s+stepping\s+(?P\d+)", re.IGNORECASE) chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) pnum = 0 while 1: try: proc = _winreg.EnumKey(chnd, pnum) except _winreg.error: break else: pnum += 1 info.append({"Processor": proc}) phnd = _winreg.OpenKey(chnd, proc) pidx = 0 while True: try: name, value, vtpe = _winreg.EnumValue(phnd, pidx) except _winreg.error: break else: pidx = pidx + 1 info[-1][name] = value if name == "Identifier": srch = prgx.search(value) if srch: info[-1]["Family"] = int(srch.group("FML")) info[-1]["Model"] = int(srch.group("MDL")) info[-1]["Stepping"] = int(srch.group("STP")) except: print(sys.exc_value, '(ignoring)') self.__class__.info = info def _not_impl(self): pass # Athlon def _is_AMD(self): return self.info[0]['VendorIdentifier'] == 'AuthenticAMD' def _is_Am486(self): return self.is_AMD() and self.info[0]['Family'] == 4 def _is_Am5x86(self): return self.is_AMD() and self.info[0]['Family'] == 4 def _is_AMDK5(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [0, 1, 2, 3]) def _is_AMDK6(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [6, 7]) def _is_AMDK6_2(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 8) def _is_AMDK6_3(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 9) def _is_AMDK7(self): return self.is_AMD() and self.info[0]['Family'] == 6 # To reliably distinguish between the different types of AMD64 chips # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would # require looking at the 'brand' from cpuid def _is_AMD64(self): return self.is_AMD() and self.info[0]['Family'] == 15 # Intel def _is_Intel(self): return self.info[0]['VendorIdentifier'] == 'GenuineIntel' def _is_i386(self): return self.info[0]['Family'] == 3 def _is_i486(self): return self.info[0]['Family'] == 4 def _is_i586(self): return self.is_Intel() and self.info[0]['Family'] == 5 def _is_i686(self): return self.is_Intel() and self.info[0]['Family'] == 6 def _is_Pentium(self): return self.is_Intel() and self.info[0]['Family'] == 5 def _is_PentiumMMX(self): return (self.is_Intel() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) def _is_PentiumPro(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] == 1) def _is_PentiumII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [3, 5, 6]) def _is_PentiumIII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) def _is_PentiumIV(self): return self.is_Intel() and self.info[0]['Family'] == 15 def _is_PentiumM(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [9, 13, 14]) def _is_Core2(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [15, 16, 17]) # Varia def _is_singleCPU(self): return len(self.info) == 1 def _getNCPUs(self): return len(self.info) def _has_mmx(self): if self.is_Intel(): return ((self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) or (self.info[0]['Family'] in [6, 15])) elif self.is_AMD(): return self.info[0]['Family'] in [5, 6, 15] else: return False def _has_sse(self): if self.is_Intel(): return ((self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) or self.info[0]['Family'] == 15) elif self.is_AMD(): return ((self.info[0]['Family'] == 6 and self.info[0]['Model'] in [6, 7, 8, 10]) or self.info[0]['Family'] == 15) else: return False def _has_sse2(self): if self.is_Intel(): return self.is_Pentium4() or self.is_PentiumM() or self.is_Core2() elif self.is_AMD(): return self.is_AMD64() else: return False def _has_3dnow(self): return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] def _has_3dnowext(self): return self.is_AMD() and self.info[0]['Family'] in [6, 15] if sys.platform.startswith('linux'): # variations: linux2,linux-i386 (any others?) cpuinfo = LinuxCPUInfo elif sys.platform.startswith('irix'): cpuinfo = IRIXCPUInfo elif sys.platform == 'darwin': cpuinfo = DarwinCPUInfo elif sys.platform[0:6] == 'netbsd': cpuinfo = NetBSDCPUInfo elif sys.platform.startswith('sunos'): cpuinfo = SunOSCPUInfo elif sys.platform.startswith('win32'): cpuinfo = Win32CPUInfo elif sys.platform.startswith('cygwin'): cpuinfo = LinuxCPUInfo #XXX: other OS's. Eg. use _winreg on Win32. Or os.uname on unices. else: cpuinfo = CPUInfoBase cpu = cpuinfo() if __name__ == "__main__": cpu.is_blaa() cpu.is_Intel() cpu.is_Alpha() info = [] for name in dir(cpuinfo): if name[0] == '_' and name[1] != '_': r = getattr(cpu, name[1:])() if r: if r != 1: info.append('%s=%s' % (name[1:], r)) else: info.append(name[1:]) print('CPU information: ' + ' '.join(info)) numexpr-2.7.1/numexpr/expressions.py000066400000000000000000000406211360375525100176630ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### __all__ = ['E'] import operator import sys import threading import numpy # numpy's behavoir sometimes changes with versioning, especially in regard as # to when ints are cast to floats. from distutils.version import LooseVersion _np_version_forbids_neg_powint = LooseVersion(numpy.__version__) >= LooseVersion('1.12.0b1') # Declare a double type that does not exist in Python space double = numpy.double # The default kind for undeclared variables default_kind = 'double' if sys.version_info[0] < 3: int_ = int long_ = long else: int_ = numpy.int32 long_ = numpy.int64 type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', double: 'double', complex: 'complex', bytes: 'bytes'} kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, 'double': double, 'complex': complex, 'bytes': bytes} kind_rank = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') scalar_constant_types = [bool, int_, long, float, double, complex, bytes] # Final corrections for Python 3 (mainly for PyTables needs) if sys.version_info[0] > 2: type_to_kind[str] = 'str' kind_to_type['str'] = str scalar_constant_types.append(str) scalar_constant_types = tuple(scalar_constant_types) from numexpr import interpreter class Expression(object): def __init__(self): object.__init__(self) def __getattr__(self, name): if name.startswith('_'): try: return self.__dict__[name] except KeyError: raise AttributeError else: return VariableNode(name, default_kind) E = Expression() class Context(threading.local): def get(self, value, default): return self.__dict__.get(value, default) def get_current_context(self): return self.__dict__ def set_new_context(self, dict_): self.__dict__.update(dict_) # This will be called each time the local object is used in a separate thread _context = Context() def get_optimization(): return _context.get('optimization', 'none') # helper functions for creating __magic__ methods def ophelper(f): def func(*args): args = list(args) for i, x in enumerate(args): if isConstant(x): args[i] = x = ConstantNode(x) if not isinstance(x, ExpressionNode): raise TypeError("unsupported object type: %s" % type(x)) return f(*args) func.__name__ = f.__name__ func.__doc__ = f.__doc__ func.__dict__.update(f.__dict__) return func def allConstantNodes(args): "returns True if args are all ConstantNodes." for x in args: if not isinstance(x, ConstantNode): return False return True def isConstant(ex): "Returns True if ex is a constant scalar of an allowed type." return isinstance(ex, scalar_constant_types) def commonKind(nodes): node_kinds = [node.astKind for node in nodes] str_count = node_kinds.count('bytes') + node_kinds.count('str') if 0 < str_count < len(node_kinds): # some args are strings, but not all raise TypeError("strings can only be operated with strings") if str_count > 0: # if there are some, all of them must be return 'bytes' n = -1 for x in nodes: n = max(n, kind_rank.index(x.astKind)) return kind_rank[n] max_int32 = 2147483647 min_int32 = -max_int32 - 1 def bestConstantType(x): # ``numpy.string_`` is a subclass of ``bytes`` if isinstance(x, (bytes, str)): return bytes # Numeric conversion to boolean values is not tried because # ``bool(1) == True`` (same for 0 and False), so 0 and 1 would be # interpreted as booleans when ``False`` and ``True`` are already # supported. if isinstance(x, (bool, numpy.bool_)): return bool # ``long`` objects are kept as is to allow the user to force # promotion of results by using long constants, e.g. by operating # a 32-bit array with a long (64-bit) constant. if isinstance(x, (long_, numpy.int64)): return long_ # ``double`` objects are kept as is to allow the user to force # promotion of results by using double constants, e.g. by operating # a float (32-bit) array with a double (64-bit) constant. if isinstance(x, double): return double if isinstance(x, (int, numpy.integer)): # Constants needing more than 32 bits are always # considered ``long``, *regardless of the platform*, so we # can clearly tell 32- and 64-bit constants apart. if not (min_int32 <= x <= max_int32): return long_ return int_ # The duality of float and double in Python avoids that we have to list # ``double`` too. for converter in float, complex: try: y = converter(x) except StandardError as err: continue if y == x: return converter def getKind(x): converter = bestConstantType(x) return type_to_kind[converter] def binop(opname, reversed=False, kind=None): # Getting the named method from self (after reversal) does not # always work (e.g. int constants do not have a __lt__ method). opfunc = getattr(operator, "__%s__" % opname) @ophelper def operation(self, other): if reversed: self, other = other, self if allConstantNodes([self, other]): return ConstantNode(opfunc(self.value, other.value)) else: return OpNode(opname, (self, other), kind=kind) return operation def func(func, minkind=None, maxkind=None): @ophelper def function(*args): if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) if kind in ('int', 'long'): # Exception for following NumPy casting rules #FIXME: this is not always desirable. The following # functions which return ints (for int inputs) on numpy # but not on numexpr: copy, abs, fmod, ones_like kind = 'double' else: # Apply regular casting rules if minkind and kind_rank.index(minkind) > kind_rank.index(kind): kind = minkind if maxkind and kind_rank.index(maxkind) < kind_rank.index(kind): kind = maxkind return FuncNode(func.__name__, args, kind) return function @ophelper def where_func(a, b, c): if isinstance(a, ConstantNode): return b if a.value else c if allConstantNodes([a, b, c]): return ConstantNode(numpy.where(a, b, c)) return FuncNode('where', [a, b, c]) def encode_axis(axis): if isinstance(axis, ConstantNode): axis = axis.value if axis is None: axis = interpreter.allaxes else: if axis < 0: raise ValueError("negative axis are not supported") if axis > 254: raise ValueError("cannot encode axis") return RawNode(axis) def gen_reduce_axis_func(name): def _func(a, axis=None): axis = encode_axis(axis) if isinstance(a, ConstantNode): return a if isinstance(a, (bool, int_, long_, float, double, complex)): a = ConstantNode(a) return FuncNode(name, [a, axis], kind=a.astKind) return _func @ophelper def contains_func(a, b): return FuncNode('contains', [a, b], kind='bool') @ophelper def div_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and a.astKind in ('float', 'double', 'complex')): return OpNode('mul', [a, ConstantNode(1. / b.value)]) return OpNode('div', [a, b]) @ophelper def truediv_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and a.astKind in ('float', 'double', 'complex')): return OpNode('mul', [a, ConstantNode(1. / b.value)]) kind = commonKind([a, b]) if kind in ('bool', 'int', 'long'): kind = 'double' return OpNode('div', [a, b], kind=kind) @ophelper def rtruediv_op(a, b): return truediv_op(b, a) @ophelper def pow_op(a, b): if (_np_version_forbids_neg_powint and b.astKind in ('int', 'long') and a.astKind in ('int', 'long') and numpy.any(b.value < 0)): raise ValueError( 'Integers to negative integer powers are not allowed.') if allConstantNodes([a, b]): return ConstantNode(a.value ** b.value) if isinstance(b, ConstantNode): x = b.value if get_optimization() == 'aggressive': RANGE = 50 # Approximate break even point with pow(x,y) # Optimize all integral and half integral powers in [-RANGE, RANGE] # Note: for complex numbers RANGE could be larger. if (int(2 * x) == 2 * x) and (-RANGE <= abs(x) <= RANGE): n = int_(abs(x)) ishalfpower = int_(abs(2 * x)) % 2 def multiply(x, y): if x is None: return y return OpNode('mul', [x, y]) r = None p = a mask = 1 while True: if (n & mask): r = multiply(r, p) mask <<= 1 if mask > n: break p = OpNode('mul', [p, p]) if ishalfpower: kind = commonKind([a]) if kind in ('int', 'long'): kind = 'double' r = multiply(r, OpNode('sqrt', [a], kind)) if r is None: r = OpNode('ones_like', [a]) if x < 0: r = OpNode('div', [ConstantNode(1), r]) return r if get_optimization() in ('moderate', 'aggressive'): if x == -1: return OpNode('div', [ConstantNode(1), a]) if x == 0: return OpNode('ones_like', [a]) if x == 0.5: kind = a.astKind if kind in ('int', 'long'): kind = 'double' return FuncNode('sqrt', [a], kind=kind) if x == 1: return a if x == 2: return OpNode('mul', [a, a]) return OpNode('pow', [a, b]) # The functions and the minimum and maximum types accepted functions = { 'copy': func(numpy.copy), 'ones_like': func(numpy.ones_like), 'sqrt': func(numpy.sqrt, 'float'), 'sin': func(numpy.sin, 'float'), 'cos': func(numpy.cos, 'float'), 'tan': func(numpy.tan, 'float'), 'arcsin': func(numpy.arcsin, 'float'), 'arccos': func(numpy.arccos, 'float'), 'arctan': func(numpy.arctan, 'float'), 'sinh': func(numpy.sinh, 'float'), 'cosh': func(numpy.cosh, 'float'), 'tanh': func(numpy.tanh, 'float'), 'arcsinh': func(numpy.arcsinh, 'float'), 'arccosh': func(numpy.arccosh, 'float'), 'arctanh': func(numpy.arctanh, 'float'), 'fmod': func(numpy.fmod, 'float'), 'arctan2': func(numpy.arctan2, 'float'), 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), 'log10': func(numpy.log10, 'float'), 'exp': func(numpy.exp, 'float'), 'expm1': func(numpy.expm1, 'float'), 'abs': func(numpy.absolute, 'float'), 'ceil': func(numpy.ceil, 'float', 'double'), 'floor': func(numpy.floor, 'float', 'double'), 'where': where_func, 'real': func(numpy.real, 'double', 'double'), 'imag': func(numpy.imag, 'double', 'double'), 'complex': func(complex, 'complex'), 'conj': func(numpy.conj, 'complex'), 'sum': gen_reduce_axis_func('sum'), 'prod': gen_reduce_axis_func('prod'), 'min': gen_reduce_axis_func('min'), 'max': gen_reduce_axis_func('max'), 'contains': contains_func, } class ExpressionNode(object): """An object that represents a generic number object. This implements the number special methods so that we can keep track of how this object has been used. """ astType = 'generic' def __init__(self, value=None, kind=None, children=None): object.__init__(self) self.value = value if kind is None: kind = 'none' self.astKind = kind if children is None: self.children = () else: self.children = tuple(children) def get_real(self): if self.astType == 'constant': return ConstantNode(complex(self.value).real) return OpNode('real', (self,), 'double') real = property(get_real) def get_imag(self): if self.astType == 'constant': return ConstantNode(complex(self.value).imag) return OpNode('imag', (self,), 'double') imag = property(get_imag) def __str__(self): return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, self.astKind, self.children) def __repr__(self): return self.__str__() def __neg__(self): return OpNode('neg', (self,)) def __invert__(self): return OpNode('invert', (self,)) def __pos__(self): return self # The next check is commented out. See #24 for more info. def __nonzero__(self): raise TypeError("You can't use Python's standard boolean operators in " "NumExpr expressions. You should use their bitwise " "counterparts instead: '&' instead of 'and', " "'|' instead of 'or', and '~' instead of 'not'.") __add__ = __radd__ = binop('add') __sub__ = binop('sub') __rsub__ = binop('sub', reversed=True) __mul__ = __rmul__ = binop('mul') if sys.version_info[0] < 3: __div__ = div_op __rdiv__ = binop('div', reversed=True) __truediv__ = truediv_op __rtruediv__ = rtruediv_op __pow__ = pow_op __rpow__ = binop('pow', reversed=True) __mod__ = binop('mod') __rmod__ = binop('mod', reversed=True) __lshift__ = binop('lshift') __rlshift__ = binop('lshift', reversed=True) __rshift__ = binop('rshift') __rrshift__ = binop('rshift', reversed=True) # boolean operations __and__ = binop('and', kind='bool') __or__ = binop('or', kind='bool') __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') __eq__ = binop('eq', kind='bool') __ne__ = binop('ne', kind='bool') __lt__ = binop('gt', reversed=True, kind='bool') __le__ = binop('ge', reversed=True, kind='bool') class LeafNode(ExpressionNode): leafNode = True class VariableNode(LeafNode): astType = 'variable' def __init__(self, value=None, kind=None, children=None): LeafNode.__init__(self, value=value, kind=kind) class RawNode(object): """Used to pass raw integers to interpreter. For instance, for selecting what function to use in func1. Purposely don't inherit from ExpressionNode, since we don't wan't this to be used for anything but being walked. """ astType = 'raw' astKind = 'none' def __init__(self, value): self.value = value self.children = () def __str__(self): return 'RawNode(%s)' % (self.value,) __repr__ = __str__ class ConstantNode(LeafNode): astType = 'constant' def __init__(self, value=None, children=None): kind = getKind(value) # Python float constants are double precision by default if kind == 'float': kind = 'double' LeafNode.__init__(self, value=value, kind=kind) def __neg__(self): return ConstantNode(-self.value) def __invert__(self): return ConstantNode(~self.value) class OpNode(ExpressionNode): astType = 'op' def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) ExpressionNode.__init__(self, value=opcode, kind=kind, children=args) class FuncNode(OpNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) OpNode.__init__(self, opcode, args, kind) numexpr-2.7.1/numexpr/functions.hpp000066400000000000000000000131061360375525100174460ustar00rootroot00000000000000// -*- c-mode -*- /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* These #if blocks make it easier to query this file, without having to define every row function before #including it. */ #ifndef FUNC_FF #define ELIDE_FUNC_FF #define FUNC_FF(...) #endif FUNC_FF(FUNC_SQRT_FF, "sqrt_ff", sqrtf, sqrtf2, vsSqrt) FUNC_FF(FUNC_SIN_FF, "sin_ff", sinf, sinf2, vsSin) FUNC_FF(FUNC_COS_FF, "cos_ff", cosf, cosf2, vsCos) FUNC_FF(FUNC_TAN_FF, "tan_ff", tanf, tanf2, vsTan) FUNC_FF(FUNC_ARCSIN_FF, "arcsin_ff", asinf, asinf2, vsAsin) FUNC_FF(FUNC_ARCCOS_FF, "arccos_ff", acosf, acosf2, vsAcos) FUNC_FF(FUNC_ARCTAN_FF, "arctan_ff", atanf, atanf2, vsAtan) FUNC_FF(FUNC_SINH_FF, "sinh_ff", sinhf, sinhf2, vsSinh) FUNC_FF(FUNC_COSH_FF, "cosh_ff", coshf, coshf2, vsCosh) FUNC_FF(FUNC_TANH_FF, "tanh_ff", tanhf, tanhf2, vsTanh) FUNC_FF(FUNC_ARCSINH_FF, "arcsinh_ff", asinhf, asinhf2, vsAsinh) FUNC_FF(FUNC_ARCCOSH_FF, "arccosh_ff", acoshf, acoshf2, vsAcosh) FUNC_FF(FUNC_ARCTANH_FF, "arctanh_ff", atanhf, atanhf2, vsAtanh) FUNC_FF(FUNC_LOG_FF, "log_ff", logf, logf2, vsLn) FUNC_FF(FUNC_LOG1P_FF, "log1p_ff", log1pf, log1pf2, vsLog1p) FUNC_FF(FUNC_LOG10_FF, "log10_ff", log10f, log10f2, vsLog10) FUNC_FF(FUNC_EXP_FF, "exp_ff", expf, expf2, vsExp) FUNC_FF(FUNC_EXPM1_FF, "expm1_ff", expm1f, expm1f2, vsExpm1) FUNC_FF(FUNC_ABS_FF, "absolute_ff", fabsf, fabsf2, vsAbs) FUNC_FF(FUNC_CONJ_FF, "conjugate_ff",fconjf, fconjf2, vsConj) FUNC_FF(FUNC_CEIL_FF, "ceil_ff", ceilf, ceilf2, vsCeil) FUNC_FF(FUNC_FLOOR_FF, "floor_ff", floorf, floorf2, vsFloor) FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FF #undef ELIDE_FUNC_FF #undef FUNC_FF #endif #ifndef FUNC_FFF #define ELIDE_FUNC_FFF #define FUNC_FFF(...) #endif FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF #undef FUNC_FFF #endif #ifndef FUNC_DD #define ELIDE_FUNC_DD #define FUNC_DD(...) #endif FUNC_DD(FUNC_SQRT_DD, "sqrt_dd", sqrt, vdSqrt) FUNC_DD(FUNC_SIN_DD, "sin_dd", sin, vdSin) FUNC_DD(FUNC_COS_DD, "cos_dd", cos, vdCos) FUNC_DD(FUNC_TAN_DD, "tan_dd", tan, vdTan) FUNC_DD(FUNC_ARCSIN_DD, "arcsin_dd", asin, vdAsin) FUNC_DD(FUNC_ARCCOS_DD, "arccos_dd", acos, vdAcos) FUNC_DD(FUNC_ARCTAN_DD, "arctan_dd", atan, vdAtan) FUNC_DD(FUNC_SINH_DD, "sinh_dd", sinh, vdSinh) FUNC_DD(FUNC_COSH_DD, "cosh_dd", cosh, vdCosh) FUNC_DD(FUNC_TANH_DD, "tanh_dd", tanh, vdTanh) FUNC_DD(FUNC_ARCSINH_DD, "arcsinh_dd", asinh, vdAsinh) FUNC_DD(FUNC_ARCCOSH_DD, "arccosh_dd", acosh, vdAcosh) FUNC_DD(FUNC_ARCTANH_DD, "arctanh_dd", atanh, vdAtanh) FUNC_DD(FUNC_LOG_DD, "log_dd", log, vdLn) FUNC_DD(FUNC_LOG1P_DD, "log1p_dd", log1p, vdLog1p) FUNC_DD(FUNC_LOG10_DD, "log10_dd", log10, vdLog10) FUNC_DD(FUNC_EXP_DD, "exp_dd", exp, vdExp) FUNC_DD(FUNC_EXPM1_DD, "expm1_dd", expm1, vdExpm1) FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) FUNC_DD(FUNC_CONJ_DD, "conjugate_dd",fconj, vdConj) FUNC_DD(FUNC_CEIL_DD, "ceil_dd", ceil, vdCeil) FUNC_DD(FUNC_FLOOR_DD, "floor_dd", floor, vdFloor) FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DD #undef ELIDE_FUNC_DD #undef FUNC_DD #endif #ifndef FUNC_DDD #define ELIDE_FUNC_DDD #define FUNC_DDD(...) #endif FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD #undef FUNC_DDD #endif #ifndef FUNC_CC #define ELIDE_FUNC_CC #define FUNC_CC(...) #endif FUNC_CC(FUNC_SQRT_CC, "sqrt_cc", nc_sqrt, vzSqrt) FUNC_CC(FUNC_SIN_CC, "sin_cc", nc_sin, vzSin) FUNC_CC(FUNC_COS_CC, "cos_cc", nc_cos, vzCos) FUNC_CC(FUNC_TAN_CC, "tan_cc", nc_tan, vzTan) FUNC_CC(FUNC_ARCSIN_CC, "arcsin_cc", nc_asin, vzAsin) FUNC_CC(FUNC_ARCCOS_CC, "arccos_cc", nc_acos, vzAcos) FUNC_CC(FUNC_ARCTAN_CC, "arctan_cc", nc_atan, vzAtan) FUNC_CC(FUNC_SINH_CC, "sinh_cc", nc_sinh, vzSinh) FUNC_CC(FUNC_COSH_CC, "cosh_cc", nc_cosh, vzCosh) FUNC_CC(FUNC_TANH_CC, "tanh_cc", nc_tanh, vzTanh) FUNC_CC(FUNC_ARCSINH_CC, "arcsinh_cc", nc_asinh, vzAsinh) FUNC_CC(FUNC_ARCCOSH_CC, "arccosh_cc", nc_acosh, vzAcosh) FUNC_CC(FUNC_ARCTANH_CC, "arctanh_cc", nc_atanh, vzAtanh) FUNC_CC(FUNC_LOG_CC, "log_cc", nc_log, vzLn) FUNC_CC(FUNC_LOG1P_CC, "log1p_cc", nc_log1p, vzLog1p) FUNC_CC(FUNC_LOG10_CC, "log10_cc", nc_log10, vzLog10) FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) FUNC_CC(FUNC_CONJ_CC, "conjugate_cc",nc_conj, vzConj) FUNC_CC(FUNC_CC_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_CC #undef ELIDE_FUNC_CC #undef FUNC_CC #endif #ifndef FUNC_CCC #define ELIDE_FUNC_CCC #define FUNC_CCC(...) #endif FUNC_CCC(FUNC_POW_CCC, "pow_ccc", nc_pow) FUNC_CCC(FUNC_CCC_LAST, NULL, NULL) #ifdef ELIDE_FUNC_CCC #undef ELIDE_FUNC_CCC #undef FUNC_CCC #endif numexpr-2.7.1/numexpr/interp_body.cpp000066400000000000000000000503411360375525100177510ustar00rootroot00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ { #define VEC_LOOP(expr) for(j = 0; j < BLOCK_SIZE; j++) { \ expr; \ } #define VEC_ARG0(expr) \ BOUNDS_CHECK(store_in); \ { \ char *dest = mem[store_in]; \ VEC_LOOP(expr); \ } break #define VEC_ARG1(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ nowarns += 1; \ VEC_LOOP(expr); \ } break #define VEC_ARG2(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ char *x2 = mem[arg2]; \ npy_intp ss2 = params.memsizes[arg2]; \ npy_intp sb2 = memsteps[arg2]; \ nowarns += ss2+sb2+*x2; \ VEC_LOOP(expr); \ } break #define VEC_ARG3(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ BOUNDS_CHECK(arg3); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ npy_intp ss1 = params.memsizes[arg1]; \ npy_intp sb1 = memsteps[arg1]; \ /* nowarns is defined and used so as to \ avoid compiler warnings about unused \ variables */ \ npy_intp nowarns = ss1+sb1+*x1; \ char *x2 = mem[arg2]; \ npy_intp ss2 = params.memsizes[arg2]; \ npy_intp sb2 = memsteps[arg2]; \ char *x3 = mem[arg3]; \ npy_intp ss3 = params.memsizes[arg3]; \ npy_intp sb3 = memsteps[arg3]; \ nowarns += ss2+sb2+*x2; \ nowarns += ss3+sb3+*x3; \ VEC_LOOP(expr); \ } break #define VEC_ARG1_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ expr; \ } break #define VEC_ARG2_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ char *x2 = mem[arg2]; \ expr; \ } break #define VEC_ARG3_VML(expr) \ BOUNDS_CHECK(store_in); \ BOUNDS_CHECK(arg1); \ BOUNDS_CHECK(arg2); \ BOUNDS_CHECK(arg3); \ { \ char *dest = mem[store_in]; \ char *x1 = mem[arg1]; \ char *x2 = mem[arg2]; \ char *x3 = mem[arg3]; \ expr; \ } break int pc; unsigned int j; // set up pointers to next block of inputs and outputs #ifdef SINGLE_ITEM_CONST_LOOP mem[0] = params.output; #else // SINGLE_ITEM_CONST_LOOP // use the iterator's inner loop data memcpy(mem, iter_dataptr, (1+params.n_inputs)*sizeof(char*)); # ifndef NO_OUTPUT_BUFFERING // if output buffering is necessary, first write to the buffer if(params.out_buffer != NULL) { mem[0] = params.out_buffer; } # endif // NO_OUTPUT_BUFFERING memcpy(memsteps, iter_strides, (1+params.n_inputs)*sizeof(npy_intp)); #endif // SINGLE_ITEM_CONST_LOOP // WARNING: From now on, only do references to mem[arg[123]] // & memsteps[arg[123]] inside the VEC_ARG[123] macros, // or you will risk accessing invalid addresses. for (pc = 0; pc < params.prog_len; pc += 4) { unsigned char op = params.program[pc]; unsigned int store_in = params.program[pc+1]; unsigned int arg1 = params.program[pc+2]; unsigned int arg2 = params.program[pc+3]; #define arg3 params.program[pc+5] // Iterator reduce macros #ifdef REDUCTION_INNER_LOOP // Reduce is the inner loop #define i_reduce *(int *)dest #define l_reduce *(long long *)dest #define f_reduce *(float *)dest #define d_reduce *(double *)dest #define cr_reduce *(double *)dest #define ci_reduce *((double *)dest+1) #else /* Reduce is the outer loop */ #define i_reduce i_dest #define l_reduce l_dest #define f_reduce f_dest #define d_reduce d_dest #define cr_reduce cr_dest #define ci_reduce ci_dest #endif #define b_dest ((char *)dest)[j] #define i_dest ((int *)dest)[j] #define l_dest ((long long *)dest)[j] #define f_dest ((float *)dest)[j] #define d_dest ((double *)dest)[j] #define cr_dest ((double *)dest)[2*j] #define ci_dest ((double *)dest)[2*j+1] #define s_dest ((char *)dest + j*memsteps[store_in]) #define b1 ((char *)(x1+j*sb1))[0] #define i1 ((int *)(x1+j*sb1))[0] #define l1 ((long long *)(x1+j*sb1))[0] #define f1 ((float *)(x1+j*sb1))[0] #define d1 ((double *)(x1+j*sb1))[0] #define c1r ((double *)(x1+j*sb1))[0] #define c1i ((double *)(x1+j*sb1))[1] #define s1 ((char *)x1+j*sb1) #define b2 ((char *)(x2+j*sb2))[0] #define i2 ((int *)(x2+j*sb2))[0] #define l2 ((long long *)(x2+j*sb2))[0] #define f2 ((float *)(x2+j*sb2))[0] #define d2 ((double *)(x2+j*sb2))[0] #define c2r ((double *)(x2+j*sb2))[0] #define c2i ((double *)(x2+j*sb2))[1] #define s2 ((char *)x2+j*sb2) #define b3 ((char *)(x3+j*sb3))[0] #define i3 ((int *)(x3+j*sb3))[0] #define l3 ((long long *)(x3+j*sb3))[0] #define f3 ((float *)(x3+j*sb3))[0] #define d3 ((double *)(x3+j*sb3))[0] #define c3r ((double *)(x3+j*sb3))[0] #define c3i ((double *)(x3+j*sb3))[1] #define s3 ((char *)x3+j*sb3) /* Some temporaries */ double da, db; npy_cdouble ca, cb; switch (op) { case OP_NOOP: break; case OP_COPY_BB: VEC_ARG1(b_dest = b1); case OP_COPY_SS: VEC_ARG1(memcpy(s_dest, s1, ss1)); /* The next versions of copy opcodes can cope with unaligned data even on platforms that crash while accessing it (like the Sparc architecture under Solaris). */ case OP_COPY_II: VEC_ARG1(memcpy(&i_dest, s1, sizeof(int))); case OP_COPY_LL: VEC_ARG1(memcpy(&l_dest, s1, sizeof(long long))); case OP_COPY_FF: VEC_ARG1(memcpy(&f_dest, s1, sizeof(float))); case OP_COPY_DD: VEC_ARG1(memcpy(&d_dest, s1, sizeof(double))); case OP_COPY_CC: VEC_ARG1(memcpy(&cr_dest, s1, sizeof(double)*2)); /* Bool */ case OP_INVERT_BB: VEC_ARG1(b_dest = !b1); case OP_AND_BBB: VEC_ARG2(b_dest = (b1 && b2)); case OP_OR_BBB: VEC_ARG2(b_dest = (b1 || b2)); case OP_EQ_BBB: VEC_ARG2(b_dest = (b1 == b2)); case OP_NE_BBB: VEC_ARG2(b_dest = (b1 != b2)); case OP_WHERE_BBBB: VEC_ARG3(b_dest = b1 ? b2 : b3); /* Comparisons */ case OP_GT_BII: VEC_ARG2(b_dest = (i1 > i2)); case OP_GE_BII: VEC_ARG2(b_dest = (i1 >= i2)); case OP_EQ_BII: VEC_ARG2(b_dest = (i1 == i2)); case OP_NE_BII: VEC_ARG2(b_dest = (i1 != i2)); case OP_GT_BLL: VEC_ARG2(b_dest = (l1 > l2)); case OP_GE_BLL: VEC_ARG2(b_dest = (l1 >= l2)); case OP_EQ_BLL: VEC_ARG2(b_dest = (l1 == l2)); case OP_NE_BLL: VEC_ARG2(b_dest = (l1 != l2)); case OP_GT_BFF: VEC_ARG2(b_dest = (f1 > f2)); case OP_GE_BFF: VEC_ARG2(b_dest = (f1 >= f2)); case OP_EQ_BFF: VEC_ARG2(b_dest = (f1 == f2)); case OP_NE_BFF: VEC_ARG2(b_dest = (f1 != f2)); case OP_GT_BDD: VEC_ARG2(b_dest = (d1 > d2)); case OP_GE_BDD: VEC_ARG2(b_dest = (d1 >= d2)); case OP_EQ_BDD: VEC_ARG2(b_dest = (d1 == d2)); case OP_NE_BDD: VEC_ARG2(b_dest = (d1 != d2)); case OP_GT_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) > 0)); case OP_GE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) >= 0)); case OP_EQ_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) == 0)); case OP_NE_BSS: VEC_ARG2(b_dest = (stringcmp(s1, s2, ss1, ss2) != 0)); case OP_CONTAINS_BSS: VEC_ARG2(b_dest = stringcontains(s1, s2, ss1, ss2)); /* Int */ case OP_CAST_IB: VEC_ARG1(i_dest = (int)(b1)); case OP_ONES_LIKE_II: VEC_ARG0(i_dest = 1); case OP_NEG_II: VEC_ARG1(i_dest = -i1); case OP_ADD_III: VEC_ARG2(i_dest = i1 + i2); case OP_SUB_III: VEC_ARG2(i_dest = i1 - i2); case OP_MUL_III: VEC_ARG2(i_dest = i1 * i2); case OP_DIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) : 0); case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (int)pow((double)i1, i2)); case OP_MOD_III: VEC_ARG2(i_dest = i2 ? (i1 % i2) : 0); case OP_LSHIFT_III: VEC_ARG2(i_dest = i1 << i2); case OP_RSHIFT_III: VEC_ARG2(i_dest = i1 >> i2); case OP_WHERE_IBII: VEC_ARG3(i_dest = b1 ? i2 : i3); /* Long */ case OP_CAST_LI: VEC_ARG1(l_dest = (long long)(i1)); case OP_ONES_LIKE_LL: VEC_ARG0(l_dest = 1); case OP_NEG_LL: VEC_ARG1(l_dest = -l1); case OP_ADD_LLL: VEC_ARG2(l_dest = l1 + l2); case OP_SUB_LLL: VEC_ARG2(l_dest = l1 - l2); case OP_MUL_LLL: VEC_ARG2(l_dest = l1 * l2); case OP_DIV_LLL: VEC_ARG2(l_dest = l2 ? (l1 / l2) : 0); #if defined _MSC_VER && _MSC_VER < 1800 case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)pow((long double)l1, (long double)l2)); #else case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)llround(pow((long double)l1, (long double)l2))); #endif case OP_MOD_LLL: VEC_ARG2(l_dest = l2 ? (l1 % l2) : 0); case OP_LSHIFT_LLL: VEC_ARG2(l_dest = l1 << l2); case OP_RSHIFT_LLL: VEC_ARG2(l_dest = l1 >> l2); case OP_WHERE_LBLL: VEC_ARG3(l_dest = b1 ? l2 : l3); /* Float */ case OP_CAST_FI: VEC_ARG1(f_dest = (float)(i1)); case OP_CAST_FL: VEC_ARG1(f_dest = (float)(l1)); case OP_ONES_LIKE_FF: VEC_ARG0(f_dest = 1.0); case OP_NEG_FF: VEC_ARG1(f_dest = -f1); case OP_ADD_FFF: VEC_ARG2(f_dest = f1 + f2); case OP_SUB_FFF: VEC_ARG2(f_dest = f1 - f2); case OP_MUL_FFF: VEC_ARG2(f_dest = f1 * f2); case OP_DIV_FFF: #ifdef USE_VML VEC_ARG2_VML(vsDiv(BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = f1 / f2); #endif case OP_POW_FFF: #ifdef USE_VML VEC_ARG2_VML(vsPow(BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = powf(f1, f2)); #endif case OP_MOD_FFF: VEC_ARG2(f_dest = f1 - floorf(f1/f2) * f2); case OP_SQRT_FF: #ifdef USE_VML VEC_ARG1_VML(vsSqrt(BLOCK_SIZE, (float*)x1, (float*)dest)); #else VEC_ARG1(f_dest = sqrtf(f1)); #endif case OP_WHERE_FBFF: VEC_ARG3(f_dest = b1 ? f2 : f3); case OP_FUNC_FFN: #ifdef USE_VML VEC_ARG1_VML(functions_ff_vml[arg2](BLOCK_SIZE, (float*)x1, (float*)dest)); #else VEC_ARG1(f_dest = functions_ff[arg2](f1)); #endif case OP_FUNC_FFFN: #ifdef USE_VML VEC_ARG2_VML(functions_fff_vml[arg3](BLOCK_SIZE, (float*)x1, (float*)x2, (float*)dest)); #else VEC_ARG2(f_dest = functions_fff[arg3](f1, f2)); #endif /* Double */ case OP_CAST_DI: VEC_ARG1(d_dest = (double)(i1)); case OP_CAST_DL: VEC_ARG1(d_dest = (double)(l1)); case OP_CAST_DF: VEC_ARG1(d_dest = (double)(f1)); case OP_ONES_LIKE_DD: VEC_ARG0(d_dest = 1.0); case OP_NEG_DD: VEC_ARG1(d_dest = -d1); case OP_ADD_DDD: VEC_ARG2(d_dest = d1 + d2); case OP_SUB_DDD: VEC_ARG2(d_dest = d1 - d2); case OP_MUL_DDD: VEC_ARG2(d_dest = d1 * d2); case OP_DIV_DDD: #ifdef USE_VML VEC_ARG2_VML(vdDiv(BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = d1 / d2); #endif case OP_POW_DDD: #ifdef USE_VML VEC_ARG2_VML(vdPow(BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = pow(d1, d2)); #endif case OP_MOD_DDD: VEC_ARG2(d_dest = d1 - floor(d1/d2) * d2); case OP_SQRT_DD: #ifdef USE_VML VEC_ARG1_VML(vdSqrt(BLOCK_SIZE, (double*)x1, (double*)dest)); #else VEC_ARG1(d_dest = sqrt(d1)); #endif case OP_WHERE_DBDD: VEC_ARG3(d_dest = b1 ? d2 : d3); case OP_FUNC_DDN: #ifdef USE_VML VEC_ARG1_VML(functions_dd_vml[arg2](BLOCK_SIZE, (double*)x1, (double*)dest)); #else VEC_ARG1(d_dest = functions_dd[arg2](d1)); #endif case OP_FUNC_DDDN: #ifdef USE_VML VEC_ARG2_VML(functions_ddd_vml[arg3](BLOCK_SIZE, (double*)x1, (double*)x2, (double*)dest)); #else VEC_ARG2(d_dest = functions_ddd[arg3](d1, d2)); #endif /* Complex */ case OP_CAST_CI: VEC_ARG1(cr_dest = (double)(i1); ci_dest = 0); case OP_CAST_CL: VEC_ARG1(cr_dest = (double)(l1); ci_dest = 0); case OP_CAST_CF: VEC_ARG1(cr_dest = f1; ci_dest = 0); case OP_CAST_CD: VEC_ARG1(cr_dest = d1; ci_dest = 0); case OP_ONES_LIKE_CC: VEC_ARG0(cr_dest = 1; ci_dest = 0); case OP_NEG_CC: VEC_ARG1(cr_dest = -c1r; ci_dest = -c1i); case OP_ADD_CCC: VEC_ARG2(cr_dest = c1r + c2r; ci_dest = c1i + c2i); case OP_SUB_CCC: VEC_ARG2(cr_dest = c1r - c2r; ci_dest = c1i - c2i); case OP_MUL_CCC: VEC_ARG2(da = c1r*c2r - c1i*c2i; ci_dest = c1r*c2i + c1i*c2r; cr_dest = da); case OP_DIV_CCC: #ifdef USE_VMLXXX /* VML complex division is slower */ VEC_ARG2_VML(vzDiv(BLOCK_SIZE, (const MKL_Complex16*)x1, (const MKL_Complex16*)x2, (MKL_Complex16*)dest)); #else VEC_ARG2(da = c2r*c2r + c2i*c2i; db = (c1r*c2r + c1i*c2i) / da; ci_dest = (c1i*c2r - c1r*c2i) / da; cr_dest = db); #endif case OP_EQ_BCC: VEC_ARG2(b_dest = (c1r == c2r && c1i == c2i)); case OP_NE_BCC: VEC_ARG2(b_dest = (c1r != c2r || c1i != c2i)); case OP_WHERE_CBCC: VEC_ARG3(cr_dest = b1 ? c2r : c3r; ci_dest = b1 ? c2i : c3i); case OP_FUNC_CCN: #ifdef USE_VML VEC_ARG1_VML(functions_cc_vml[arg2](BLOCK_SIZE, (const MKL_Complex16*)x1, (MKL_Complex16*)dest)); #else VEC_ARG1(ca.real = c1r; ca.imag = c1i; functions_cc[arg2](&ca, &ca); cr_dest = ca.real; ci_dest = ca.imag); #endif case OP_FUNC_CCCN: VEC_ARG2(ca.real = c1r; ca.imag = c1i; cb.real = c2r; cb.imag = c2i; functions_ccc[arg3](&ca, &cb, &ca); cr_dest = ca.real; ci_dest = ca.imag); case OP_REAL_DC: VEC_ARG1(d_dest = c1r); case OP_IMAG_DC: VEC_ARG1(d_dest = c1i); case OP_COMPLEX_CDD: VEC_ARG2(cr_dest = d1; ci_dest = d2); /* Reductions */ case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); case OP_SUM_LLN: VEC_ARG1(l_reduce += l1); case OP_SUM_FFN: VEC_ARG1(f_reduce += f1); case OP_SUM_DDN: VEC_ARG1(d_reduce += d1); case OP_SUM_CCN: VEC_ARG1(cr_reduce += c1r; ci_reduce += c1i); case OP_PROD_IIN: VEC_ARG1(i_reduce *= i1); case OP_PROD_LLN: VEC_ARG1(l_reduce *= l1); case OP_PROD_FFN: VEC_ARG1(f_reduce *= f1); case OP_PROD_DDN: VEC_ARG1(d_reduce *= d1); case OP_PROD_CCN: VEC_ARG1(da = cr_reduce*c1r - ci_reduce*c1i; ci_reduce = cr_reduce*c1i + ci_reduce*c1r; cr_reduce = da); case OP_MIN_IIN: VEC_ARG1(i_reduce = fmin(i_reduce, i1)); case OP_MIN_LLN: VEC_ARG1(l_reduce = fmin(l_reduce, l1)); case OP_MIN_FFN: VEC_ARG1(f_reduce = fmin(f_reduce, f1)); case OP_MIN_DDN: VEC_ARG1(d_reduce = fmin(d_reduce, d1)); case OP_MAX_IIN: VEC_ARG1(i_reduce = fmax(i_reduce, i1)); case OP_MAX_LLN: VEC_ARG1(l_reduce = fmax(l_reduce, l1)); case OP_MAX_FFN: VEC_ARG1(f_reduce = fmax(f_reduce, f1)); case OP_MAX_DDN: VEC_ARG1(d_reduce = fmax(d_reduce, d1)); default: *pc_error = pc; return -3; break; } } #ifndef NO_OUTPUT_BUFFERING // If output buffering was necessary, copy the buffer to the output if(params.out_buffer != NULL) { memcpy(iter_dataptr[0], params.out_buffer, params.memsizes[0] * BLOCK_SIZE); } #endif // NO_OUTPUT_BUFFERING #undef VEC_LOOP #undef VEC_ARG1 #undef VEC_ARG2 #undef VEC_ARG3 #undef i_reduce #undef l_reduce #undef f_reduce #undef d_reduce #undef cr_reduce #undef ci_reduce #undef b_dest #undef i_dest #undef l_dest #undef f_dest #undef d_dest #undef cr_dest #undef ci_dest #undef s_dest #undef b1 #undef i1 #undef l1 #undef f1 #undef d1 #undef c1r #undef c1i #undef s1 #undef b2 #undef i2 #undef l2 #undef f2 #undef d2 #undef c2r #undef c2i #undef s2 #undef b3 #undef i3 #undef l3 #undef f3 #undef d3 #undef c3r #undef c3i #undef s3 } /* Local Variables: c-basic-offset: 4 End: */ numexpr-2.7.1/numexpr/interpreter.cpp000066400000000000000000001342361360375525100200040ustar00rootroot00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ #include "module.hpp" #include #include #include #include #include #include "numexpr_config.hpp" #include "complex_functions.hpp" #include "interpreter.hpp" #include "numexpr_object.hpp" #ifdef _MSC_VER /* Some missing symbols and functions for Win */ #define fmax max #define fmin min #define INFINITY (DBL_MAX+DBL_MAX) #define NAN (INFINITY-INFINITY) #endif #ifndef SIZE_MAX #define SIZE_MAX ((size_t)-1) #endif #define RETURN_TYPE char* // AVAILABLE(Haystack, Haystack_Len, J, Needle_Len) // A macro that returns nonzero if there are at least Needle_Len // bytes left starting at Haystack[J]. // Haystack is 'unsigned char *', Haystack_Len, J, and Needle_Len // are 'size_t'; Haystack_Len is an lvalue. For NUL-terminated // searches, Haystack_Len can be modified each iteration to avoid // having to compute the end of Haystack up front. #define AVAILABLE(Haystack, Haystack_Len, J, Needle_Len) \ ((Haystack_Len) >= (J) + (Needle_Len)) #include "str-two-way.hpp" #ifdef DEBUG #define DEBUG_TEST 1 #else #define DEBUG_TEST 0 #endif using namespace std; // Global state thread_data th_params; /* This file and interp_body should really be generated from a description of the opcodes -- there's too much repetition here for manually editing */ /* bit of a misnomer; includes the return value. */ #define NUMEXPR_MAX_ARGS 4 static char op_signature_table[][NUMEXPR_MAX_ARGS] = { #define Tb 'b' #define Ti 'i' #define Tl 'l' #define Tf 'f' #define Td 'd' #define Tc 'c' #define Ts 's' #define Tn 'n' #define T0 0 #define OPCODE(n, e, ex, rt, a1, a2, a3) {rt, a1, a2, a3}, #include "opcodes.hpp" #undef OPCODE #undef Tb #undef Ti #undef Tl #undef Tf #undef Td #undef Tc #undef Ts #undef Tn #undef T0 }; /* returns the sig of the nth op, '\0' if no more ops -1 on failure */ static int op_signature(int op, unsigned int n) { if (n >= NUMEXPR_MAX_ARGS) { return 0; } if (op < 0 || op > OP_END) { return -1; } return op_signature_table[op][n]; } /* To add a function to the lookup table, add to FUNC_CODES (first group is 1-arg functions, second is 2-arg functions), also to functions_f or functions_ff as appropriate. Finally, use add_func down below to add to funccodes. Functions with more arguments aren't implemented at present, but should be easy; just copy the 1- or 2-arg case. Some functions (for example, sqrt) are repeated in this table that are opcodes, but there's no problem with that as the compiler selects opcodes over functions, and this makes it easier to compare opcode vs. function speeds. */ typedef float (*FuncFFPtr)(float); #ifdef _WIN32 FuncFFPtr functions_ff[] = { #define FUNC_FF(fop, s, f, f_win32, ...) f_win32, #include "functions.hpp" #undef FUNC_FF }; #else FuncFFPtr functions_ff[] = { #define FUNC_FF(fop, s, f, ...) f, #include "functions.hpp" #undef FUNC_FF }; #endif #ifdef USE_VML /* Fake vsConj function just for casting purposes inside numexpr */ static void vsConj(MKL_INT n, const float* x1, float* dest) { MKL_INT j; for (j=0; j= NPY_MAXDIMS) axis = NPY_MAXDIMS - axis; return axis; } int check_program(NumExprObject *self) { unsigned char *program; Py_ssize_t prog_len, n_buffers, n_inputs; int pc, arg, argloc, argno, sig; char *fullsig, *signature; if (PyBytes_AsStringAndSize(self->program, (char **)&program, &prog_len) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read program"); return -1; } if (prog_len % 4 != 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: prog_len mod 4 != 0"); return -1; } if (PyBytes_AsStringAndSize(self->fullsig, (char **)&fullsig, &n_buffers) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read fullsig"); return -1; } if (PyBytes_AsStringAndSize(self->signature, (char **)&signature, &n_inputs) < 0) { PyErr_Format(PyExc_RuntimeError, "invalid program: can't read signature"); return -1; } if (n_buffers > 255) { PyErr_Format(PyExc_RuntimeError, "invalid program: too many buffers"); return -1; } for (pc = 0; pc < prog_len; pc += 4) { unsigned int op = program[pc]; if (op == OP_NOOP) { continue; } if ((op >= OP_REDUCTION) && pc != prog_len-4) { PyErr_Format(PyExc_RuntimeError, "invalid program: reduction operations must occur last"); return -1; } for (argno = 0; ; argno++) { sig = op_signature(op, argno); if (sig == -1) { PyErr_Format(PyExc_RuntimeError, "invalid program: illegal opcode at %i (%d)", pc, op); return -1; } if (sig == 0) break; if (argno < 3) { argloc = pc+argno+1; } if (argno >= 3) { if (pc + 1 >= prog_len) { PyErr_Format(PyExc_RuntimeError, "invalid program: double opcode (%c) at end (%i)", pc, sig); return -1; } argloc = pc+argno+2; } arg = program[argloc]; if (sig != 'n' && ((arg >= n_buffers) || (arg < 0))) { PyErr_Format(PyExc_RuntimeError, "invalid program: buffer out of range (%i) at %i", arg, argloc); return -1; } if (sig == 'n') { if (op == OP_FUNC_FFN) { if (arg < 0 || arg >= FUNC_FF_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_FFFN) { if (arg < 0 || arg >= FUNC_FFF_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_DDN) { if (arg < 0 || arg >= FUNC_DD_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_DDDN) { if (arg < 0 || arg >= FUNC_DDD_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_CCN) { if (arg < 0 || arg >= FUNC_CC_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op == OP_FUNC_CCCN) { if (arg < 0 || arg >= FUNC_CCC_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } } else if (op >= OP_REDUCTION) { ; } else { PyErr_Format(PyExc_RuntimeError, "invalid program: internal checker error processing %i", argloc); return -1; } /* The next is to avoid problems with the ('i','l') duality, specially in 64-bit platforms */ } else if (((sig == 'l') && (fullsig[arg] == 'i')) || ((sig == 'i') && (fullsig[arg] == 'l'))) { ; } else if (sig != fullsig[arg]) { PyErr_Format(PyExc_RuntimeError, "invalid : opcode signature doesn't match buffer (%c vs %c) at %i", sig, fullsig[arg], argloc); return -1; } } } return 0; } struct index_data { int count; int size; int findex; npy_intp *shape; npy_intp *strides; int *index; char *buffer; }; // BOUNDS_CHECK is used in interp_body.cpp #define DO_BOUNDS_CHECK 1 #if DO_BOUNDS_CHECK #define BOUNDS_CHECK(arg) if ((arg) >= params.r_end) { \ *pc_error = pc; \ return -2; \ } #else #define BOUNDS_CHECK(arg) #endif int stringcmp(const char *s1, const char *s2, npy_intp maxlen1, npy_intp maxlen2) { npy_intp maxlen, nextpos; /* Point to this when the end of a string is found, to simulate infinte trailing NULL characters. */ const char null = 0; // First check if some of the operands is the empty string and if so, // just check that the first char of the other is the NULL one. // Fixes #121 if (maxlen2 == 0) return *s1 != null; if (maxlen1 == 0) return *s2 != null; maxlen = (maxlen1 > maxlen2) ? maxlen1 : maxlen2; for (nextpos = 1; nextpos <= maxlen; nextpos++) { if (*s1 < *s2) return -1; if (*s1 > *s2) return +1; s1 = (nextpos >= maxlen1) ? &null : s1+1; s2 = (nextpos >= maxlen2) ? &null : s2+1; } return 0; } /* contains(str1, str2) function for string columns. Based on Newlib/strstr.c. */ int stringcontains(const char *haystack_start, const char *needle_start, npy_intp max_haystack_len, npy_intp max_needle_len) { // needle_len - Length of needle. // haystack_len - Known minimum length of haystack. size_t needle_len = (size_t)max_needle_len; size_t haystack_len = (size_t)max_haystack_len; const char *haystack = haystack_start; const char *needle = needle_start; bool ok = true; /* needle is prefix of haystack. */ char *res; size_t si = 0; size_t min_len = min(needle_len, haystack_len); while (*haystack && *needle && si < min_len) { ok &= *haystack++ == *needle++; si++; } /* check needle is prefix of haystack and calc needle length */ if (si == needle_len || *needle == 0) { if (ok) return 1; needle_len = si; } else { /* haystack less needle */ return 0; } /* calc haystack length */ while (*haystack && si < haystack_len) { haystack++; si++; } haystack_len = si; if (needle_len < LONG_NEEDLE_THRESHOLD) { res = two_way_short_needle((const unsigned char *)haystack_start, haystack_len, (const unsigned char *)needle_start, needle_len); } else { res = two_way_long_needle((const unsigned char *)haystack_start, haystack_len, (const unsigned char *)needle_start, needle_len); } return res != NULL ? 1 : 0; } /* Get space for VM temporary registers */ int get_temps_space(const vm_params& params, char **mem, size_t block_size) { int r, k = 1 + params.n_inputs + params.n_constants; for (r = k; r < k + params.n_temps; r++) { mem[r] = (char *)malloc(block_size * params.memsizes[r]); if (mem[r] == NULL) { return -1; } } return 0; } /* Free space for VM temporary registers */ void free_temps_space(const vm_params& params, char **mem) { int r, k = 1 + params.n_inputs + params.n_constants; for (r = k; r < k + params.n_temps; r++) { free(mem[r]); } } /* Serial/parallel task iterator version of the VM engine */ int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg) { char **mem = params.mem; NpyIter_IterNextFunc *iternext; npy_intp block_size, *size_ptr; char **iter_dataptr; npy_intp *iter_strides; iternext = NpyIter_GetIterNext(iter, errmsg); if (iternext == NULL) { return -1; } size_ptr = NpyIter_GetInnerLoopSizePtr(iter); iter_dataptr = NpyIter_GetDataPtrArray(iter); iter_strides = NpyIter_GetInnerStrideArray(iter); /* * First do all the blocks with a compile-time fixed size. * This makes a big difference (30-50% on some tests). */ block_size = *size_ptr; while (block_size == BLOCK_SIZE1) { #define REDUCTION_INNER_LOOP #define BLOCK_SIZE BLOCK_SIZE1 #include "interp_body.cpp" #undef BLOCK_SIZE #undef REDUCTION_INNER_LOOP iternext(iter); block_size = *size_ptr; } /* Then finish off the rest */ if (block_size > 0) do { #define REDUCTION_INNER_LOOP #define BLOCK_SIZE block_size #include "interp_body.cpp" #undef BLOCK_SIZE #undef REDUCTION_INNER_LOOP } while (iternext(iter)); return 0; } static int vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg) { char **mem = params.mem; NpyIter_IterNextFunc *iternext; npy_intp block_size, *size_ptr; char **iter_dataptr; npy_intp *iter_strides; iternext = NpyIter_GetIterNext(iter, errmsg); if (iternext == NULL) { return -1; } size_ptr = NpyIter_GetInnerLoopSizePtr(iter); iter_dataptr = NpyIter_GetDataPtrArray(iter); iter_strides = NpyIter_GetInnerStrideArray(iter); /* * First do all the blocks with a compile-time fixed size. * This makes a big difference (30-50% on some tests). */ block_size = *size_ptr; while (block_size == BLOCK_SIZE1) { #define BLOCK_SIZE BLOCK_SIZE1 #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE iternext(iter); block_size = *size_ptr; } /* Then finish off the rest */ if (block_size > 0) do { #define BLOCK_SIZE block_size #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE } while (iternext(iter)); return 0; } /* Parallel iterator version of VM engine */ static int vm_engine_iter_parallel(NpyIter *iter, const vm_params& params, bool need_output_buffering, int *pc_error, char **errmsg) { int i, ret = -1; npy_intp numblocks, taskfactor; if (errmsg == NULL) { return -1; } /* Ensure only one parallel job is running at a time (otherwise the global th_params get corrupted). */ Py_BEGIN_ALLOW_THREADS; pthread_mutex_lock(&gs.parallel_mutex); Py_END_ALLOW_THREADS; /* Populate parameters for worker threads */ NpyIter_GetIterIndexRange(iter, &th_params.start, &th_params.vlen); /* * Try to make it so each thread gets 16 tasks. This is a compromise * between 1 task per thread and one block per task. */ taskfactor = 16*BLOCK_SIZE1*gs.nthreads; numblocks = (th_params.vlen - th_params.start + taskfactor - 1) / taskfactor; th_params.block_size = numblocks * BLOCK_SIZE1; th_params.params = params; th_params.need_output_buffering = need_output_buffering; th_params.ret_code = 0; th_params.pc_error = pc_error; th_params.errmsg = errmsg; th_params.iter[0] = iter; /* Make one copy for each additional thread */ for (i = 1; i < gs.nthreads; ++i) { th_params.iter[i] = NpyIter_Copy(iter); if (th_params.iter[i] == NULL) { --i; for (; i > 0; --i) { NpyIter_Deallocate(th_params.iter[i]); } goto end; } } th_params.memsteps[0] = params.memsteps; /* Make one copy of memsteps for each additional thread */ for (i = 1; i < gs.nthreads; ++i) { th_params.memsteps[i] = PyMem_New(npy_intp, 1 + params.n_inputs + params.n_constants + params.n_temps); if (th_params.memsteps[i] == NULL) { --i; for (; i > 0; --i) { PyMem_Del(th_params.memsteps[i]); } for (i = 0; i < gs.nthreads; ++i) { NpyIter_Deallocate(th_params.iter[i]); } goto end; } memcpy(th_params.memsteps[i], th_params.memsteps[0], sizeof(npy_intp) * (1 + params.n_inputs + params.n_constants + params.n_temps)); } Py_BEGIN_ALLOW_THREADS; /* Synchronization point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; /* Beware of spurious wakeups. See issue pydata/numexpr#306. */ do { pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } while (!gs.barrier_passed); } else { gs.barrier_passed = 1; pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Synchronization point for all threads (wait for finalization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads > 0) { gs.count_threads--; do { pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } while (gs.barrier_passed); } else { gs.barrier_passed = 0; pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); Py_END_ALLOW_THREADS; /* Deallocate all the iterator and memsteps copies */ for (i = 1; i < gs.nthreads; ++i) { NpyIter_Deallocate(th_params.iter[i]); PyMem_Del(th_params.memsteps[i]); } ret = th_params.ret_code; end: pthread_mutex_unlock(&gs.parallel_mutex); return ret; } static int run_interpreter(NumExprObject *self, NpyIter *iter, NpyIter *reduce_iter, bool reduction_outer_loop, bool need_output_buffering, int *pc_error) { int r; Py_ssize_t plen; vm_params params; char *errmsg = NULL; *pc_error = -1; if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), &plen) < 0) { return -1; } params.prog_len = (int)plen; params.output = NULL; params.inputs = NULL; params.index_data = NULL; params.n_inputs = self->n_inputs; params.n_constants = self->n_constants; params.n_temps = self->n_temps; params.mem = self->mem; params.memsteps = self->memsteps; params.memsizes = self->memsizes; params.r_end = (int)PyBytes_Size(self->fullsig); params.out_buffer = NULL; if ((gs.nthreads == 1) || gs.force_serial) { // Can do it as one "task" if (reduce_iter == NULL) { // Allocate memory for output buffering if needed vector out_buffer(need_output_buffering ? (self->memsizes[0] * BLOCK_SIZE1) : 0); params.out_buffer = need_output_buffering ? &out_buffer[0] : NULL; // Reset the iterator to allocate its buffers if(NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; r = vm_engine_iter_task(iter, params.memsteps, params, pc_error, &errmsg); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } else { if (reduction_outer_loop) { char **dataptr; NpyIter_IterNextFunc *iternext; dataptr = NpyIter_GetDataPtrArray(reduce_iter); iternext = NpyIter_GetIterNext(reduce_iter, NULL); if (iternext == NULL) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; do { r = NpyIter_ResetBasePointers(iter, dataptr, &errmsg); if (r >= 0) { r = vm_engine_iter_outer_reduce_task(iter, params.memsteps, params, pc_error, &errmsg); } if (r < 0) { break; } } while (iternext(reduce_iter)); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } else { char **dataptr; NpyIter_IterNextFunc *iternext; dataptr = NpyIter_GetDataPtrArray(iter); iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { return -1; } get_temps_space(params, params.mem, BLOCK_SIZE1); Py_BEGIN_ALLOW_THREADS; do { r = NpyIter_ResetBasePointers(reduce_iter, dataptr, &errmsg); if (r >= 0) { r = vm_engine_iter_task(reduce_iter, params.memsteps, params, pc_error, &errmsg); } if (r < 0) { break; } } while (iternext(iter)); Py_END_ALLOW_THREADS; free_temps_space(params, params.mem); } } } else { if (reduce_iter == NULL) { r = vm_engine_iter_parallel(iter, params, need_output_buffering, pc_error, &errmsg); } else { errmsg = (char *) "Parallel engine doesn't support reduction yet"; r = -1; } } if (r < 0 && errmsg != NULL) { PyErr_SetString(PyExc_RuntimeError, errmsg); } return 0; } static int run_interpreter_const(NumExprObject *self, char *output, int *pc_error) { vm_params params; Py_ssize_t plen; char **mem; npy_intp *memsteps; *pc_error = -1; if (PyBytes_AsStringAndSize(self->program, (char **)&(params.program), &plen) < 0) { return -1; } if (self->n_inputs != 0) { return -1; } params.prog_len = (int)plen; params.output = output; params.inputs = NULL; params.index_data = NULL; params.n_inputs = self->n_inputs; params.n_constants = self->n_constants; params.n_temps = self->n_temps; params.mem = self->mem; memsteps = self->memsteps; params.memsizes = self->memsizes; params.r_end = (int)PyBytes_Size(self->fullsig); mem = params.mem; get_temps_space(params, mem, 1); #define SINGLE_ITEM_CONST_LOOP #define BLOCK_SIZE 1 #define NO_OUTPUT_BUFFERING // Because it's constant #include "interp_body.cpp" #undef NO_OUTPUT_BUFFERING #undef BLOCK_SIZE #undef SINGLE_ITEM_CONST_LOOP free_temps_space(params, mem); return 0; } PyObject * NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds) { PyArrayObject *operands[NPY_MAXARGS]; PyArray_Descr *dtypes[NPY_MAXARGS], **dtypes_tmp; PyObject *tmp, *ret; npy_uint32 op_flags[NPY_MAXARGS]; NPY_CASTING casting = NPY_SAFE_CASTING; NPY_ORDER order = NPY_KEEPORDER; unsigned int i, n_inputs; int r, pc_error = 0; int reduction_axis = -1; npy_intp reduction_size = -1; // For #277 change this 1 -> -1 to be in-line with NumPy 1.8, int ex_uses_vml = 0, is_reduction = 0; bool reduction_outer_loop = false, need_output_buffering = false, full_reduction = false; // To specify axes when doing a reduction int op_axes_values[NPY_MAXARGS][NPY_MAXDIMS], op_axes_reduction_values[NPY_MAXARGS]; int *op_axes_ptrs[NPY_MAXDIMS]; int oa_ndim = 0; int **op_axes = NULL; NpyIter *iter = NULL, *reduce_iter = NULL; // Check whether we need to restart threads if (!gs.init_threads_done || gs.pid != getpid()) { numexpr_set_nthreads(gs.nthreads); } // Don't force serial mode by default gs.force_serial = 0; // Check whether there's a reduction as the final step is_reduction = last_opcode(self->program) > OP_REDUCTION; n_inputs = (int)PyTuple_Size(args); if (PyBytes_Size(self->signature) != n_inputs) { return PyErr_Format(PyExc_ValueError, "number of inputs doesn't match program"); } else if (n_inputs+1 > NPY_MAXARGS) { return PyErr_Format(PyExc_ValueError, "too many inputs"); } memset(operands, 0, sizeof(operands)); memset(dtypes, 0, sizeof(dtypes)); if (kwds) { tmp = PyDict_GetItemString(kwds, "casting"); // borrowed ref if (tmp != NULL && !PyArray_CastingConverter(tmp, &casting)) { return NULL; } tmp = PyDict_GetItemString(kwds, "order"); // borrowed ref if (tmp != NULL && !PyArray_OrderConverter(tmp, &order)) { return NULL; } tmp = PyDict_GetItemString(kwds, "ex_uses_vml"); // borrowed ref if (tmp == NULL) { return PyErr_Format(PyExc_ValueError, "ex_uses_vml parameter is required"); } if (tmp == Py_True) { ex_uses_vml = 1; } // borrowed ref operands[0] = (PyArrayObject *)PyDict_GetItemString(kwds, "out"); if (operands[0] != NULL) { if ((PyObject *)operands[0] == Py_None) { operands[0] = NULL; } else if (!PyArray_Check(operands[0])) { return PyErr_Format(PyExc_ValueError, "out keyword parameter is not an array"); } else { Py_INCREF(operands[0]); } } } for (i = 0; i < n_inputs; i++) { PyObject *o = PyTuple_GET_ITEM(args, i); // borrowed ref PyObject *a; char c = PyBytes_AS_STRING(self->signature)[i]; int typecode = typecode_from_char(c); // Convert it if it's not an array if (!PyArray_Check(o)) { if (typecode == -1) goto fail; a = PyArray_FROM_OTF(o, typecode, NPY_ARRAY_NOTSWAPPED); } else { Py_INCREF(o); a = o; } operands[i+1] = (PyArrayObject *)a; dtypes[i+1] = PyArray_DescrFromType(typecode); if (operands[0] != NULL) { // Check for the case where "out" is one of the inputs // TODO: Probably should deal with the general overlap case, // but NumPy ufuncs don't do that yet either. if (PyArray_DATA(operands[0]) == PyArray_DATA(operands[i+1])) { need_output_buffering = true; } } if (operands[i+1] == NULL || dtypes[i+1] == NULL) { goto fail; } op_flags[i+1] = NPY_ITER_READONLY| #ifdef USE_VML (ex_uses_vml ? (NPY_ITER_CONTIG|NPY_ITER_ALIGNED) : 0)| #endif #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif NPY_ITER_NBO ; } if (is_reduction) { // A reduction can not result in a string, // so we don't need to worry about item sizes here. char retsig = get_return_sig(self->program); reduction_axis = get_reduction_axis(self->program); // Need to set up op_axes for the non-reduction part if (reduction_axis != 255) { // Get the number of broadcast dimensions for (i = 0; i < n_inputs; ++i) { int ndim = PyArray_NDIM(operands[i+1]); if (ndim > oa_ndim) { oa_ndim = ndim; } } if (reduction_axis < 0 || reduction_axis >= oa_ndim) { PyErr_Format(PyExc_ValueError, "reduction axis is out of bounds"); goto fail; } // Fill in the op_axes op_axes_ptrs[0] = NULL; op_axes_reduction_values[0] = -1; for (i = 0; i < n_inputs; ++i) { int j = 0, idim, ndim = PyArray_NDIM(operands[i+1]); for (idim = 0; idim < oa_ndim-ndim; ++idim) { if (idim != reduction_axis) { op_axes_values[i+1][j++] = -1; } else { op_axes_reduction_values[i+1] = -1; } } for (idim = oa_ndim-ndim; idim < oa_ndim; ++idim) { if (idim != reduction_axis) { op_axes_values[i+1][j++] = idim-(oa_ndim-ndim); } else { npy_intp size = PyArray_DIM(operands[i+1], idim-(oa_ndim-ndim)); if (size > reduction_size) { reduction_size = size; } op_axes_reduction_values[i+1] = idim-(oa_ndim-ndim); } } op_axes_ptrs[i+1] = op_axes_values[i+1]; } // op_axes has one less than the broadcast dimensions --oa_ndim; if (oa_ndim > 0) { op_axes = op_axes_ptrs; } else { reduction_size = 1; } } // A full reduction can be done without nested iteration if (oa_ndim == 0) { full_reduction = true; if (operands[0] == NULL) { npy_intp dim = 1; operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, typecode_from_char(retsig)); if (!operands[0]) goto fail; } else if (PyArray_SIZE(operands[0]) != 1) { PyErr_Format(PyExc_ValueError, "out argument must have size 1 for a full reduction"); goto fail; } } dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); op_flags[0] = NPY_ITER_READWRITE| NPY_ITER_ALLOCATE| // Copy, because it can't buffer the reduction NPY_ITER_UPDATEIFCOPY| NPY_ITER_NBO| #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif (oa_ndim == 0 ? 0 : NPY_ITER_NO_BROADCAST); } else { char retsig = get_return_sig(self->program); if (retsig != 's') { dtypes[0] = PyArray_DescrFromType(typecode_from_char(retsig)); } else { /* Since the *only* supported operation returning a string * is a copy, the size of returned strings * can be directly gotten from the first (and only) * input/constant/temporary. */ if (n_inputs > 0) { // input, like in 'a' where a -> 'foo' dtypes[0] = PyArray_DESCR(operands[1]); Py_INCREF(dtypes[0]); } else { // constant, like in '"foo"' dtypes[0] = PyArray_DescrNewFromType(NPY_STRING); dtypes[0]->elsize = (int)self->memsizes[1]; } // no string temporaries, so no third case } if (dtypes[0] == NULL) { goto fail; } op_flags[0] = NPY_ITER_WRITEONLY| NPY_ITER_ALLOCATE| NPY_ITER_CONTIG| NPY_ITER_NBO| #ifndef USE_UNALIGNED_ACCESS NPY_ITER_ALIGNED| #endif NPY_ITER_NO_BROADCAST; } // Check for empty arrays in expression if (n_inputs > 0) { char retsig = get_return_sig(self->program); // Check length for all inputs int zeroi, zerolen = 0; for (i=0; i < n_inputs; i++) { if (PyArray_SIZE(operands[i+1]) == 0) { zerolen = 1; zeroi = i+1; break; } } if (zerolen != 0) { // Allocate the output int ndim = PyArray_NDIM(operands[zeroi]); npy_intp *dims = PyArray_DIMS(operands[zeroi]); operands[0] = (PyArrayObject *)PyArray_SimpleNew(ndim, dims, typecode_from_char(retsig)); if (operands[0] == NULL) { goto fail; } ret = (PyObject *)operands[0]; Py_INCREF(ret); goto cleanup_and_exit; } } /* A case with a single constant output */ if (n_inputs == 0) { char retsig = get_return_sig(self->program); /* Allocate the output */ if (operands[0] == NULL) { npy_intp dim = 1; operands[0] = (PyArrayObject *)PyArray_SimpleNew(0, &dim, typecode_from_char(retsig)); if (operands[0] == NULL) { goto fail; } } else { PyArrayObject *a; if (PyArray_SIZE(operands[0]) != 1) { PyErr_SetString(PyExc_ValueError, "output for a constant expression must have size 1"); goto fail; } else if (!PyArray_ISWRITEABLE(operands[0])) { PyErr_SetString(PyExc_ValueError, "output is not writeable"); goto fail; } Py_INCREF(dtypes[0]); a = (PyArrayObject *)PyArray_FromArray(operands[0], dtypes[0], NPY_ARRAY_ALIGNED|NPY_ARRAY_UPDATEIFCOPY); if (a == NULL) { goto fail; } Py_DECREF(operands[0]); operands[0] = a; } r = run_interpreter_const(self, PyArray_BYTES(operands[0]), &pc_error); ret = (PyObject *)operands[0]; Py_INCREF(ret); goto cleanup_and_exit; } /* Allocate the iterator or nested iterators */ if (reduction_size < 0 || full_reduction) { /* When there's no reduction, reduction_size is 1 as well */ // RAM: in issue #277 this was also the case for reductions on arrays // with axis=0 having singleton dimension, i.e. such ops were interpreted // as full_reductions when they weren't in Numpy. As such, the default // reduction_size is now -1 and we add the flag for full_reduction, // e.g. ne.evaluate("sum(a)")" iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_REDUCE_OK| NPY_ITER_RANGED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, -1, NULL, NULL, BLOCK_SIZE1); if (iter == NULL) { goto fail; } } else { npy_uint32 op_flags_outer[NPY_MAXDIMS]; /* The outer loop is unbuffered */ op_flags_outer[0] = NPY_ITER_READWRITE| NPY_ITER_ALLOCATE| NPY_ITER_NO_BROADCAST; for (i = 0; i < n_inputs; ++i) { op_flags_outer[i+1] = NPY_ITER_READONLY; } /* Arbitrary threshold for which is the inner loop...benchmark? */ if (reduction_size < 64) { reduction_outer_loop = true; iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_RANGED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, oa_ndim, op_axes, NULL, BLOCK_SIZE1); if (iter == NULL) { goto fail; } /* If the output was allocated, get it for the second iterator */ if (operands[0] == NULL) { operands[0] = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(operands[0]); } op_axes[0] = &op_axes_reduction_values[0]; for (i = 0; i < n_inputs; ++i) { op_axes[i+1] = &op_axes_reduction_values[i+1]; } op_flags_outer[0] &= ~NPY_ITER_NO_BROADCAST; reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_REDUCE_OK, order, casting, op_flags_outer, NULL, 1, op_axes, NULL, 0); if (reduce_iter == NULL) { goto fail; } } else { PyArray_Descr *dtypes_outer[NPY_MAXDIMS]; /* If the output is being allocated, need to specify its dtype */ dtypes_outer[0] = dtypes[0]; for (i = 0; i < n_inputs; ++i) { dtypes_outer[i+1] = NULL; } iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_RANGED, order, casting, op_flags_outer, dtypes_outer, oa_ndim, op_axes, NULL, 0); if (iter == NULL) { goto fail; } /* If the output was allocated, get it for the second iterator */ if (operands[0] == NULL) { operands[0] = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(operands[0]); } op_axes[0] = &op_axes_reduction_values[0]; for (i = 0; i < n_inputs; ++i) { op_axes[i+1] = &op_axes_reduction_values[i+1]; } op_flags[0] &= ~NPY_ITER_NO_BROADCAST; reduce_iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| NPY_ITER_REDUCE_OK| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_EXTERNAL_LOOP, order, casting, op_flags, dtypes, 1, op_axes, NULL, BLOCK_SIZE1); if (reduce_iter == NULL) { goto fail; } } } /* Initialize the output to the reduction unit */ if (is_reduction) { PyArrayObject *a = NpyIter_GetOperandArray(iter)[0]; PyObject *fill; int op = last_opcode(self->program); if (op < OP_PROD) { /* sum identity is 0 */ fill = PyLong_FromLong(0); } else if (op >= OP_PROD && op < OP_MIN) { /* product identity is 1 */ fill = PyLong_FromLong(1); } else if (PyArray_DESCR(a)->kind == 'f') { /* floating point min/max identity is NaN */ fill = PyFloat_FromDouble(NAN); } else if (op >= OP_MIN && op < OP_MAX) { /* integer min identity */ fill = PyLong_FromLong(LONG_MAX); } else { /* integer max identity */ fill = PyLong_FromLong(LONG_MIN); } PyArray_FillWithScalar(a, fill); Py_DECREF(fill); } /* Get the sizes of all the operands */ dtypes_tmp = NpyIter_GetDescrArray(iter); for (i = 0; i < n_inputs+1; ++i) { self->memsizes[i] = dtypes_tmp[i]->elsize; } /* For small calculations, just use 1 thread */ if (NpyIter_GetIterSize(iter) < 2*BLOCK_SIZE1) { gs.force_serial = 1; } /* Reductions do not support parallel execution yet */ if (is_reduction) { gs.force_serial = 1; } r = run_interpreter(self, iter, reduce_iter, reduction_outer_loop, need_output_buffering, &pc_error); if (r < 0) { if (r == -1) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "an error occurred while running the program"); } } else if (r == -2) { PyErr_Format(PyExc_RuntimeError, "bad argument at pc=%d", pc_error); } else if (r == -3) { PyErr_Format(PyExc_RuntimeError, "bad opcode at pc=%d", pc_error); } else { PyErr_SetString(PyExc_RuntimeError, "unknown error occurred while running the program"); } goto fail; } /* Get the output from the iterator */ ret = (PyObject *)NpyIter_GetOperandArray(iter)[0]; Py_INCREF(ret); NpyIter_Deallocate(iter); if (reduce_iter != NULL) { NpyIter_Deallocate(reduce_iter); } cleanup_and_exit: for (i = 0; i < n_inputs+1; i++) { Py_XDECREF(operands[i]); Py_XDECREF(dtypes[i]); } return ret; fail: for (i = 0; i < n_inputs+1; i++) { Py_XDECREF(operands[i]); Py_XDECREF(dtypes[i]); } if (iter != NULL) { NpyIter_Deallocate(iter); } if (reduce_iter != NULL) { NpyIter_Deallocate(reduce_iter); } return NULL; } /* Local Variables: c-basic-offset: 4 End: */ numexpr-2.7.1/numexpr/interpreter.hpp000066400000000000000000000052241360375525100200030ustar00rootroot00000000000000#ifndef NUMEXPR_INTERPRETER_HPP #define NUMEXPR_INTERPRETER_HPP #include "numexpr_config.hpp" // Forward declaration struct NumExprObject; enum OpCodes { #define OPCODE(n, e, ...) e = n, #include "opcodes.hpp" #undef OPCODE }; enum FuncFFCodes { #define FUNC_FF(fop, ...) fop, #include "functions.hpp" #undef FUNC_FF }; enum FuncFFFCodes { #define FUNC_FFF(fop, ...) fop, #include "functions.hpp" #undef FUNC_FFF }; enum FuncDDCodes { #define FUNC_DD(fop, ...) fop, #include "functions.hpp" #undef FUNC_DD }; enum FuncDDDCodes { #define FUNC_DDD(fop, ...) fop, #include "functions.hpp" #undef FUNC_DDD }; enum FuncCCCodes { #define FUNC_CC(fop, ...) fop, #include "functions.hpp" #undef FUNC_CC }; enum FuncCCCCodes { #define FUNC_CCC(fop, ...) fop, #include "functions.hpp" #undef FUNC_CCC }; struct vm_params { int prog_len; unsigned char *program; int n_inputs; int n_constants; int n_temps; unsigned int r_end; char *output; char **inputs; char **mem; npy_intp *memsteps; npy_intp *memsizes; struct index_data *index_data; // Memory for output buffering. If output buffering is unneeded, // it contains NULL. char *out_buffer; }; // Structure for parameters in worker threads struct thread_data { npy_intp start; npy_intp vlen; npy_intp block_size; vm_params params; int ret_code; int *pc_error; char **errmsg; // NOTE: memsteps, iter, and reduce_iter are arrays, they MUST be allocated // to length `global_max_threads` before module load. // One memsteps array per thread // npy_intp *memsteps[MAX_THREADS]; npy_intp **memsteps; // One iterator per thread */ // NpyIter *iter[MAX_THREADS]; NpyIter **iter; // When doing nested iteration for a reduction // NpyIter *reduce_iter[MAX_THREADS] NpyIter **reduce_iter; // Flag indicating reduction is the outer loop instead of the inner bool reduction_outer_loop; // Flag indicating whether output buffering is needed bool need_output_buffering; }; // Global state which holds thread parameters extern thread_data th_params; PyObject *NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds); char get_return_sig(PyObject* program); int check_program(NumExprObject *self); int get_temps_space(const vm_params& params, char **mem, size_t block_size); void free_temps_space(const vm_params& params, char **mem); int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, const vm_params& params, int *pc_error, char **errmsg); #endif // NUMEXPR_INTERPRETER_HPP numexpr-2.7.1/numexpr/missing_posix_functions.hpp000066400000000000000000000036361360375525100224300ustar00rootroot00000000000000#ifndef NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP #define NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* These functions are not included in some non-POSIX compilers, like MSVC 7.1 */ /* Double precision versions */ inline double log1p(double x) { double u = 1.0 + x; if (u == 1.0) { return x; } else { return log(u) * x / (u-1.0); } } inline double expm1(double x) { double u = exp(x); if (u == 1.0) { return x; } else if (u-1.0 == -1.0) { return -1; } else { return (u-1.0) * x/log(u); } } inline double asinh(double xx) { double x, d; int sign; if (xx < 0.0) { sign = -1; x = -xx; } else { sign = 1; x = xx; } if (x > 1e8) { d = x; } else { d = sqrt(x*x + 1.0); } return sign*log1p(x*(1.0 + x/(d+1.0))); } inline double acosh(double x) { return 2*log(sqrt((x+1.0)/2)+sqrt((x-1.0)/2)); } inline double atanh(double x) { /* This definition is different from that in NumPy 1.3 and follows the convention of MatLab. This will allow for double checking both approaches. */ return 0.5*log((1.0+x)/(1.0-x)); } /* Single precision versions */ inline float log1pf(float x) { return (float) log1p((double)x); } inline float expm1f(float x) { return (float) expm1((double)x); } inline float asinhf(float x) { return (float) asinh((double)x); } inline float acoshf(float x) { return (float) acosh((double)x); } inline float atanhf(float x) { return (float) atanh((double)x); } #endif // NUMEXPR_MISSING_POSIX_FUNCTIONS_HPP numexpr-2.7.1/numexpr/module.cpp000066400000000000000000000343231360375525100167220ustar00rootroot00000000000000// Numexpr - Fast numerical array expression evaluator for NumPy. // // License: MIT // Author: See AUTHORS.txt // // See LICENSE.txt for details about copyright and rights to use. // // module.cpp contains the CPython-specific module exposure. #define DO_NUMPY_IMPORT_ARRAY #include "module.hpp" #include #include #include "interpreter.hpp" #include "numexpr_object.hpp" using namespace std; // Global state. The file interpreter.hpp also has some global state // in its 'th_params' variable global_state gs; long global_max_threads=DEFAULT_MAX_THREADS; /* Do the worker job for a certain thread */ void *th_worker(void *tidptr) { int tid = *(int *)tidptr; /* Parameters for threads */ npy_intp start; npy_intp vlen; npy_intp block_size; NpyIter *iter; vm_params params; int *pc_error; int ret; int n_inputs; int n_constants; int n_temps; size_t memsize; char **mem; npy_intp *memsteps; npy_intp istart, iend; char **errmsg; // For output buffering if needed vector out_buffer; while (1) { /* Sentinels have to be initialised yet */ gs.init_sentinels_done = 0; /* Meeting point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; /* Beware of spurious wakeups. See issue pydata/numexpr#306. */ do { pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } while (!gs.barrier_passed); } else { gs.barrier_passed = 1; pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Check if thread has been asked to return */ if (gs.end_threads) { return(0); } /* Get parameters for this thread before entering the main loop */ start = th_params.start; vlen = th_params.vlen; block_size = th_params.block_size; params = th_params.params; pc_error = th_params.pc_error; // If output buffering is needed, allocate it if (th_params.need_output_buffering) { out_buffer.resize(params.memsizes[0] * BLOCK_SIZE1); params.out_buffer = &out_buffer[0]; } else { params.out_buffer = NULL; } /* Populate private data for each thread */ n_inputs = params.n_inputs; n_constants = params.n_constants; n_temps = params.n_temps; memsize = (1+n_inputs+n_constants+n_temps) * sizeof(char *); /* XXX malloc seems thread safe for POSIX, but for Win? */ mem = (char **)malloc(memsize); memcpy(mem, params.mem, memsize); errmsg = th_params.errmsg; params.mem = mem; /* Loop over blocks */ pthread_mutex_lock(&gs.count_mutex); if (!gs.init_sentinels_done) { /* Set sentinels and other global variables */ gs.gindex = start; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } gs.init_sentinels_done = 1; /* sentinels have been initialised */ gs.giveup = 0; /* no giveup initially */ } else { gs.gindex += block_size; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } } /* Grab one of the iterators */ iter = th_params.iter[tid]; if (iter == NULL) { th_params.ret_code = -1; gs.giveup = 1; } memsteps = th_params.memsteps[tid]; /* Get temporary space for each thread */ ret = get_temps_space(params, mem, BLOCK_SIZE1); if (ret < 0) { /* Propagate error to main thread */ th_params.ret_code = ret; gs.giveup = 1; } pthread_mutex_unlock(&gs.count_mutex); while (istart < vlen && !gs.giveup) { /* Reset the iterator to the range for this task */ ret = NpyIter_ResetToIterIndexRange(iter, istart, iend, errmsg); /* Execute the task */ if (ret >= 0) { ret = vm_engine_iter_task(iter, memsteps, params, pc_error, errmsg); } if (ret < 0) { pthread_mutex_lock(&gs.count_mutex); gs.giveup = 1; /* Propagate error to main thread */ th_params.ret_code = ret; pthread_mutex_unlock(&gs.count_mutex); break; } pthread_mutex_lock(&gs.count_mutex); gs.gindex += block_size; istart = gs.gindex; iend = istart + block_size; if (iend > vlen) { iend = vlen; } pthread_mutex_unlock(&gs.count_mutex); } /* Meeting point for all threads (wait for finalization) */ pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads > 0) { gs.count_threads--; do { pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } while (gs.barrier_passed); } else { gs.barrier_passed = 0; pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Release resources */ free_temps_space(params, mem); free(mem); } /* closes while(1) */ /* This should never be reached, but anyway */ return(0); } /* Initialize threads */ int init_threads(void) { int tid, rc; if ( !(gs.nthreads > 1 && (!gs.init_threads_done || gs.pid != getpid())) ) { /* Thread pool must always be initialized once and once only. */ return(0); } /* Initialize mutex and condition variable objects */ pthread_mutex_init(&gs.count_mutex, NULL); pthread_mutex_init(&gs.parallel_mutex, NULL); /* Barrier initialization */ pthread_mutex_init(&gs.count_threads_mutex, NULL); pthread_cond_init(&gs.count_threads_cv, NULL); gs.count_threads = 0; /* Reset threads counter */ gs.barrier_passed = 0; /* Finally, create the threads */ for (tid = 0; tid < gs.nthreads; tid++) { gs.tids[tid] = tid; rc = pthread_create(&gs.threads[tid], NULL, th_worker, (void *)&gs.tids[tid]); if (rc) { fprintf(stderr, "ERROR; return code from pthread_create() is %d\n", rc); fprintf(stderr, "\tError detail: %s\n", strerror(rc)); exit(-1); } } gs.init_threads_done = 1; /* Initialization done! */ gs.pid = (int)getpid(); /* save the PID for this process */ return(0); } /* Set the number of threads in numexpr's VM */ int numexpr_set_nthreads(int nthreads_new) { int nthreads_old = gs.nthreads; int t, rc; void *status; // if (nthreads_new > MAX_THREADS) { // fprintf(stderr, // "Error. nthreads cannot be larger than MAX_THREADS (%d)", // MAX_THREADS); // return -1; // } if (nthreads_new > global_max_threads) { fprintf(stderr, "Error. nthreads cannot be larger than environment variable \"NUMEXPR_MAX_THREADS\" (%ld)", global_max_threads); return -1; } else if (nthreads_new <= 0) { fprintf(stderr, "Error. nthreads must be a positive integer"); return -1; } /* Only join threads if they are not initialized or if our PID is different from that in pid var (probably means that we are a subprocess, and thus threads are non-existent). */ if (gs.nthreads > 1 && gs.init_threads_done && gs.pid == getpid()) { /* Tell all existing threads to finish */ gs.end_threads = 1; pthread_mutex_lock(&gs.count_threads_mutex); if (gs.count_threads < gs.nthreads) { gs.count_threads++; do { pthread_cond_wait(&gs.count_threads_cv, &gs.count_threads_mutex); } while (!gs.barrier_passed); } else { gs.barrier_passed = 1; pthread_cond_broadcast(&gs.count_threads_cv); } pthread_mutex_unlock(&gs.count_threads_mutex); /* Join exiting threads */ for (t=0; t= 3 /* XXX: handle the "global_state" state via moduledef */ static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "interpreter", NULL, -1, /* sizeof(struct global_state), */ module_methods, NULL, NULL, /* module_traverse, */ NULL, /* module_clear, */ NULL }; #define INITERROR return NULL PyObject * PyInit_interpreter(void) #else #define INITERROR return PyMODINIT_FUNC initinterpreter() #endif { PyObject *m, *d; char *max_thread_str = getenv("NUMEXPR_MAX_THREADS"); char *end; if (max_thread_str != NULL) { global_max_threads = strtol(max_thread_str, &end, 10); } th_params.memsteps = (npy_intp**)calloc(sizeof(npy_intp*), global_max_threads); th_params.iter = (NpyIter**)calloc(sizeof(NpyIter*), global_max_threads); th_params.reduce_iter = (NpyIter**)calloc(sizeof(NpyIter*), global_max_threads); gs.threads = (pthread_t*)calloc(sizeof(pthread_t), global_max_threads); gs.tids = (int*)calloc(sizeof(int), global_max_threads); // TODO: for Py3, deallocate: https://docs.python.org/3/c-api/module.html#c.PyModuleDef.m_free // For Python 2.7, people have to exit the process to reclaim the memory. if (PyType_Ready(&NumExprType) < 0) INITERROR; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); #else m = Py_InitModule3("interpreter", module_methods, NULL); #endif if (m == NULL) INITERROR; Py_INCREF(&NumExprType); PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType); import_array(); d = PyDict_New(); if (!d) INITERROR; #define OPCODE(n, name, sname, ...) \ if (add_symbol(d, sname, name, "add_op") < 0) { INITERROR; } #include "opcodes.hpp" #undef OPCODE if (PyModule_AddObject(m, "opcodes", d) < 0) INITERROR; d = PyDict_New(); if (!d) INITERROR; #define add_func(name, sname) \ if (add_symbol(d, sname, name, "add_func") < 0) { INITERROR; } #define FUNC_FF(name, sname, ...) add_func(name, sname); #define FUNC_FFF(name, sname, ...) add_func(name, sname); #define FUNC_DD(name, sname, ...) add_func(name, sname); #define FUNC_DDD(name, sname, ...) add_func(name, sname); #define FUNC_CC(name, sname, ...) add_func(name, sname); #define FUNC_CCC(name, sname, ...) add_func(name, sname); #include "functions.hpp" #undef FUNC_CCC #undef FUNC_CC #undef FUNC_DDD #undef FUNC_DD #undef FUNC_DD #undef FUNC_FFF #undef FUNC_FF #undef add_func if (PyModule_AddObject(m, "funccodes", d) < 0) INITERROR; if (PyModule_AddObject(m, "allaxes", PyLong_FromLong(255)) < 0) INITERROR; if (PyModule_AddObject(m, "maxdims", PyLong_FromLong(NPY_MAXDIMS)) < 0) INITERROR; if(PyModule_AddIntConstant(m, "MAX_THREADS", global_max_threads) < 0) INITERROR; #if PY_MAJOR_VERSION >= 3 return m; #endif } #ifdef __cplusplus } // extern "C" #endif numexpr-2.7.1/numexpr/module.hpp000066400000000000000000000043251360375525100167260ustar00rootroot00000000000000#ifndef NUMEXPR_MODULE_HPP #define NUMEXPR_MODULE_HPP // Deal with the clunky numpy import mechanism // by inverting the logic of the NO_IMPORT_ARRAY symbol. #define PY_ARRAY_UNIQUE_SYMBOL numexpr_ARRAY_API #ifndef DO_NUMPY_IMPORT_ARRAY # define NO_IMPORT_ARRAY #endif #define NPY_NO_DEPRECATED_API NPY_API_VERSION #include #include #include #include "numexpr_config.hpp" struct global_state { /* Global variables for threads */ int nthreads; /* number of desired threads in pool */ int init_threads_done; /* pool of threads initialized? */ int end_threads; /* should exisiting threads end? */ // pthread_t threads[MAX_THREADS]; /* opaque structure for threads */ // int tids[MAX_THREADS]; /* ID per each thread */ /* NOTE: threads and tids are arrays, they MUST be allocated to length `global_max_threads` before module load. */ pthread_t *threads; /* opaque structure for threads */ int *tids; /* ID per each thread */ npy_intp gindex; /* global index for all threads */ int init_sentinels_done; /* sentinels initialized? */ int giveup; /* should parallel code giveup? */ int force_serial; /* force serial code instead of parallel? */ int pid; /* the PID for this process */ /* Synchronization variables for threadpool state */ pthread_mutex_t count_mutex; int count_threads; int barrier_passed; /* indicates if the thread pool's thread barrier is unlocked and ready for the VM to process.*/ pthread_mutex_t count_threads_mutex; pthread_cond_t count_threads_cv; /* Mutual exclusion for access to global thread params (th_params) */ pthread_mutex_t parallel_mutex; global_state() { nthreads = 1; init_threads_done = 0; barrier_passed = 0; end_threads = 0; pid = 0; } }; extern global_state gs; int numexpr_set_nthreads(int nthreads_new); #endif // NUMEXPR_MODULE_HPP numexpr-2.7.1/numexpr/msvc_function_stubs.hpp000066400000000000000000000066161360375525100215430ustar00rootroot00000000000000#ifndef NUMEXPR_MSVC_FUNCTION_STUBS_HPP #define NUMEXPR_MSVC_FUNCTION_STUBS_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* Declare stub functions for MSVC. It turns out that single precision definitions in are actually #define'd and are not usable as function pointers :-/ */ #if _MSC_VER < 1400 // 1310 == MSVC 7.1 /* Apparently, single precision functions are not included in MSVC 7.1 */ #define sqrtf(x) ((float)sqrt((double)(x))) #define sinf(x) ((float)sin((double)(x))) #define cosf(x) ((float)cos((double)(x))) #define tanf(x) ((float)tan((double)(x))) #define asinf(x) ((float)asin((double)(x))) #define acosf(x) ((float)acos((double)(x))) #define atanf(x) ((float)atan((double)(x))) #define sinhf(x) ((float)sinh((double)(x))) #define coshf(x) ((float)cosh((double)(x))) #define tanhf(x) ((float)tanh((double)(x))) #define asinhf(x) ((float)asinh((double)(x))) #define acoshf(x) ((float)acosh((double)(x))) #define atanhf(x) ((float)atanh((double)(x))) #define logf(x) ((float)log((double)(x))) #define log1pf(x) ((float)log1p((double)(x))) #define log10f(x) ((float)log10((double)(x))) #define expf(x) ((float)exp((double)(x))) #define expm1f(x) ((float)expm1((double)(x))) #define fabsf(x) ((float)fabs((double)(x))) #define fmodf(x, y) ((float)fmod((double)(x), (double)(y))) #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) #define floorf(x) ((float)floor((double)(x))) #endif // _MSC_VER < 1400 /* Now the actual stubs */ inline float sqrtf2(float x) { return sqrtf(x); } inline float sinf2(float x) { return sinf(x); } inline float cosf2(float x) { return cosf(x); } inline float tanf2(float x) { return tanf(x); } inline float asinf2(float x) { return asinf(x); } inline float acosf2(float x) { return acosf(x); } inline float atanf2(float x) { return atanf(x); } inline float sinhf2(float x) { return sinhf(x); } inline float coshf2(float x) { return coshf(x); } inline float tanhf2(float x) { return tanhf(x); } inline float asinhf2(float x) { return asinhf(x); } inline float acoshf2(float x) { return acoshf(x); } inline float atanhf2(float x) { return atanhf(x); } inline float logf2(float x) { return logf(x); } inline float log1pf2(float x) { return log1pf(x); } inline float log10f2(float x) { return log10f(x); } inline float expf2(float x) { return expf(x); } inline float expm1f2(float x) { return expm1f(x); } inline float fabsf2(float x) { return fabsf(x); } inline float fmodf2(float x, float y) { return fmodf(x, y); } inline float atan2f2(float x, float y) { return atan2f(x, y); } // Needed for allowing the internal casting in numexpr machinery for // conjugate operations inline float fconjf2(float x) { return x; } inline float ceilf2(float x) { return ceilf(x); } inline float floorf2(float x) { return floorf(x); } #endif // NUMEXPR_MSVC_FUNCTION_STUBS_HPP numexpr-2.7.1/numexpr/necompiler.py000066400000000000000000000674601360375525100174500ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import __future__ import sys import numpy import threading is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE from numexpr import interpreter, expressions, use_vml from numexpr.utils import CacheDict # Declare a double type that does not exist in Python space double = numpy.double if sys.version_info[0] < 3: int_ = int long_ = long else: int_ = numpy.int32 long_ = numpy.int64 typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double', 'c': 'complex', 's': 'bytes', 'n': 'none'} kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd', 'complex': 'c', 'bytes': 's', 'none': 'n'} type_to_typecode = {bool: 'b', int_: 'i', long_: 'l', float: 'f', double: 'd', complex: 'c', bytes: 's'} type_to_kind = expressions.type_to_kind kind_to_type = expressions.kind_to_type default_type = kind_to_type[expressions.default_kind] # VML functions that are implemented in numexpr vml_functions = [ "div", # interp_body.cpp "inv", # interp_body.cpp "pow", # interp_body.cpp # Keep the rest of this list in sync with the ones listed in functions.hpp "sqrt", "sin", "cos", "tan", "arcsin", "arccos", "arctan", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", "log", "log1p", "log10", "exp", "expm1", "absolute", "conjugate", "arctan2", "fmod", "ceil", "floor" ] # Final addtions for Python 3 (mainly for PyTables needs) if sys.version_info[0] > 2: typecode_to_kind['s'] = 'str' kind_to_typecode['str'] = 's' type_to_typecode[str] = 's' scalar_constant_kinds = kind_to_typecode.keys() class ASTNode(object): """Abstract Syntax Tree node. Members: astType -- type of node (op, constant, variable, raw, or alias) astKind -- the type of the result (bool, float, etc.) value -- value associated with this node. An opcode, numerical value, a variable name, etc. children -- the children below this node reg -- the register assigned to the result for this node. """ cmpnames = ['astType', 'astKind', 'value', 'children'] def __init__(self, astType='generic', astKind='unknown', value=None, children=()): object.__init__(self) self.astType = astType self.astKind = astKind self.value = value self.children = tuple(children) self.reg = None def __eq__(self, other): if self.astType == 'alias': self = self.value if other.astType == 'alias': other = other.value if not isinstance(other, ASTNode): return False for name in self.cmpnames: if getattr(self, name) != getattr(other, name): return False return True def __lt__(self,other): # RAM: this is a fix for issue #88 whereby sorting on constants # that may be of astKind == 'complex' but type(self.value) == int or float # Here we let NumPy sort as it will cast data properly for comparison # when the Python built-ins will raise an error. if self.astType == 'constant': if self.astKind == other.astKind: return numpy.array(self.value) < numpy.array(other.value) return self.astKind < other.astKind else: raise TypeError( 'Sorting not implemented for astType: %s'%self.astType ) def __hash__(self): if self.astType == 'alias': self = self.value return hash((self.astType, self.astKind, self.value, self.children)) def __str__(self): return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, self.value, self.children, self.reg) def __repr__(self): return '' % id(self) def key(self): return (self.astType, self.astKind, self.value, self.children) def typecode(self): return kind_to_typecode[self.astKind] def postorderWalk(self): for c in self.children: for w in c.postorderWalk(): yield w yield self def allOf(self, *astTypes): astTypes = set(astTypes) for w in self.postorderWalk(): if w.astType in astTypes: yield w def expressionToAST(ex): """Take an expression tree made out of expressions.ExpressionNode, and convert to an AST tree. This is necessary as ExpressionNode overrides many methods to act like a number. """ return ASTNode(ex.astType, ex.astKind, ex.value, [expressionToAST(c) for c in ex.children]) def sigPerms(s): """Generate all possible signatures derived by upcasting the given signature. """ codes = 'bilfdc' if not s: yield '' elif s[0] in codes: start = codes.index(s[0]) for x in codes[start:]: for y in sigPerms(s[1:]): yield x + y elif s[0] == 's': # numbers shall not be cast to strings for y in sigPerms(s[1:]): yield 's' + y else: yield s def typeCompileAst(ast): """Assign appropiate types to each node in the AST. Will convert opcodes and functions to appropiate upcast version, and add "cast" ops if needed. """ children = list(ast.children) if ast.astType == 'op': retsig = ast.typecode() basesig = ''.join(x.typecode() for x in list(ast.children)) # Find some operation that will work on an acceptable casting of args. for sig in sigPerms(basesig): value = (ast.value + '_' + retsig + sig).encode('ascii') if value in interpreter.opcodes: break else: for sig in sigPerms(basesig): funcname = (ast.value + '_' + retsig + sig).encode('ascii') if funcname in interpreter.funccodes: value = ('func_%sn' % (retsig + sig)).encode('ascii') children += [ASTNode('raw', 'none', interpreter.funccodes[funcname])] break else: raise NotImplementedError( "couldn't find matching opcode for '%s'" % (ast.value + '_' + retsig + basesig)) # First just cast constants, then cast variables if necessary: for i, (have, want) in enumerate(zip(basesig, sig)): if have != want: kind = typecode_to_kind[want] if children[i].astType == 'constant': children[i] = ASTNode('constant', kind, children[i].value) else: opname = "cast" children[i] = ASTNode('op', kind, opname, [children[i]]) else: value = ast.value children = ast.children return ASTNode(ast.astType, ast.astKind, value, [typeCompileAst(c) for c in children]) class Register(object): """Abstraction for a register in the VM. Members: node -- the AST node this corresponds to temporary -- True if this isn't an input or output immediate -- not a register, but an immediate value n -- the physical register number. None if no number assigned yet. """ def __init__(self, astnode, temporary=False): self.node = astnode self.temporary = temporary self.immediate = False self.n = None def __str__(self): if self.temporary: name = 'Temporary' else: name = 'Register' return '%s(%s, %s, %s)' % (name, self.node.astType, self.node.astKind, self.n,) def __repr__(self): return self.__str__() class Immediate(Register): """Representation of an immediate (integer) operand, instead of a register. """ def __init__(self, astnode): Register.__init__(self, astnode) self.immediate = True def __str__(self): return 'Immediate(%d)' % (self.node.value,) def stringToExpression(s, types, context): """Given a string, convert it to a tree of ExpressionNode's. """ old_ctx = expressions._context.get_current_context() try: expressions._context.set_new_context(context) # first compile to a code object to determine the names if context.get('truediv', False): flags = __future__.division.compiler_flag else: flags = 0 c = compile(s, '', 'eval', flags) # make VariableNode's for the names names = {} for name in c.co_names: if name == "None": names[name] = None elif name == "True": names[name] = True elif name == "False": names[name] = False else: t = types.get(name, default_type) names[name] = expressions.VariableNode(name, type_to_kind[t]) names.update(expressions.functions) # now build the expression ex = eval(c, names) if expressions.isConstant(ex): ex = expressions.ConstantNode(ex, expressions.getKind(ex)) elif not isinstance(ex, expressions.ExpressionNode): raise TypeError("unsupported expression type: %s" % type(ex)) finally: expressions._context.set_new_context(old_ctx) return ex def isReduction(ast): prefixes = (b'sum_', b'prod_', b'min_', b'max_') return any(ast.value.startswith(p) for p in prefixes) def getInputOrder(ast, input_order=None): """Derive the input order of the variables in an expression. """ variables = {} for a in ast.allOf('variable'): variables[a.value] = a variable_names = set(variables.keys()) if input_order: if variable_names != set(input_order): raise ValueError( "input names (%s) don't match those found in expression (%s)" % (input_order, variable_names)) ordered_names = input_order else: ordered_names = list(variable_names) ordered_names.sort() ordered_variables = [variables[v] for v in ordered_names] return ordered_variables def convertConstantToKind(x, kind): # Exception for 'float' types that will return the NumPy float32 type if kind == 'float': return numpy.float32(x) elif sys.version_info[0] >= 3 and isinstance(x,str): return x.encode('ascii') return kind_to_type[kind](x) def getConstants(ast): ''' RAM: implemented magic method __lt__ for ASTNode to fix issues #88 and #209. The following test code works now, as does the test suite. import numexpr as ne a = 1 + 3j; b = 5.0 ne.evaluate( 'a*2 + 15j - b' ) ''' constants_order = sorted( ast.allOf('constant') ) constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] return constants_order, constants def sortNodesByOrder(nodes, order): order_map = {} for i, (_, v, _) in enumerate(order): order_map[v] = i dec_nodes = [(order_map[n.value], n) for n in nodes] dec_nodes.sort() return [a[1] for a in dec_nodes] def assignLeafRegisters(inodes, registerMaker): """Assign new registers to each of the leaf nodes. """ leafRegisters = {} for node in inodes: key = node.key() if key in leafRegisters: node.reg = leafRegisters[key] else: node.reg = leafRegisters[key] = registerMaker(node) def assignBranchRegisters(inodes, registerMaker): """Assign temporary registers to each of the branch nodes. """ for node in inodes: node.reg = registerMaker(node, temporary=True) def collapseDuplicateSubtrees(ast): """Common subexpression elimination. """ seen = {} aliases = [] for a in ast.allOf('op'): if a in seen: target = seen[a] a.astType = 'alias' a.value = target a.children = () aliases.append(a) else: seen[a] = a # Set values and registers so optimizeTemporariesAllocation # doesn't get confused for a in aliases: while a.value.astType == 'alias': a.value = a.value.value return aliases def optimizeTemporariesAllocation(ast): """Attempt to minimize the number of temporaries needed, by reusing old ones. """ nodes = [n for n in ast.postorderWalk() if n.reg.temporary] users_of = dict((n.reg, set()) for n in nodes) node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) for n in nodes) if nodes and nodes[-1] is not ast: nodes_to_check = nodes + [ast] else: nodes_to_check = nodes for n in nodes_to_check: for c in n.children: if c.reg.temporary: users_of[c.reg].add(n) unused = dict([(tc, set()) for tc in scalar_constant_kinds]) for n in nodes: for c in n.children: reg = c.reg if reg.temporary: users = users_of[reg] users.discard(n) if not users: unused[reg.node.astKind].add(reg) if unused[n.astKind]: reg = unused[n.astKind].pop() users_of[reg] = users_of[n.reg] n.reg = reg def setOrderedRegisterNumbers(order, start): """Given an order of nodes, assign register numbers. """ for i, node in enumerate(order): node.reg.n = start + i return start + len(order) def setRegisterNumbersForTemporaries(ast, start): """Assign register numbers for temporary registers, keeping track of aliases and handling immediate operands. """ seen = 0 signature = '' aliases = [] for node in ast.postorderWalk(): if node.astType == 'alias': aliases.append(node) node = node.value if node.reg.immediate: node.reg.n = node.value continue reg = node.reg if reg.n is None: reg.n = start + seen seen += 1 signature += reg.node.typecode() for node in aliases: node.reg = node.value.reg return start + seen, signature def convertASTtoThreeAddrForm(ast): """Convert an AST to a three address form. Three address form is (op, reg1, reg2, reg3), where reg1 is the destination of the result of the instruction. I suppose this should be called three register form, but three address form is found in compiler theory. """ return [(node.value, node.reg) + tuple([c.reg for c in node.children]) for node in ast.allOf('op')] def compileThreeAddrForm(program): """Given a three address form of the program, compile it a string that the VM understands. """ def nToChr(reg): if reg is None: return b'\xff' elif reg.n < 0: raise ValueError("negative value for register number %s" % reg.n) else: if sys.version_info[0] < 3: return chr(reg.n) else: # int.to_bytes is not available in Python < 3.2 #return reg.n.to_bytes(1, sys.byteorder) return bytes([reg.n]) def quadrupleToString(opcode, store, a1=None, a2=None): cop = chr(interpreter.opcodes[opcode]).encode('ascii') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) return cop + cs + ca1 + ca2 def toString(args): while len(args) < 4: args += (None,) opcode, store, a1, a2 = args[:4] s = quadrupleToString(opcode, store, a1, a2) l = [s] args = args[4:] while args: s = quadrupleToString(b'noop', *args[:3]) l.append(s) args = args[3:] return b''.join(l) prog_str = b''.join([toString(t) for t in program]) return prog_str context_info = [ ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), ('truediv', (False, True, 'auto'), 'auto') ] def getContext(kwargs, frame_depth=1): d = kwargs.copy() context = {} for name, allowed, default in context_info: value = d.pop(name, default) if value in allowed: context[name] = value else: raise ValueError("'%s' must be one of %s" % (name, allowed)) if d: raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0]) if context['truediv'] == 'auto': caller_globals = sys._getframe(frame_depth + 1).f_globals context['truediv'] = caller_globals.get('division', None) == __future__.division return context def precompile(ex, signature=(), context={}): """Compile the expression to an intermediate form. """ types = dict(signature) input_order = [name for (name, type_) in signature] if isinstance(ex, (str, unicode)): ex = stringToExpression(ex, types, context) # the AST is like the expression, but the node objects don't have # any odd interpretations ast = expressionToAST(ex) if ex.astType != 'op': ast = ASTNode('op', value='copy', astKind=ex.astKind, children=(ast,)) ast = typeCompileAst(ast) aliases = collapseDuplicateSubtrees(ast) assignLeafRegisters(ast.allOf('raw'), Immediate) assignLeafRegisters(ast.allOf('variable', 'constant'), Register) assignBranchRegisters(ast.allOf('op'), Register) # assign registers for aliases for a in aliases: a.reg = a.value.reg input_order = getInputOrder(ast, input_order) constants_order, constants = getConstants(ast) if isReduction(ast): ast.reg.temporary = False optimizeTemporariesAllocation(ast) ast.reg.temporary = False r_output = 0 ast.reg.n = 0 r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) r_temps = setOrderedRegisterNumbers(constants_order, r_constants) r_end, tempsig = setRegisterNumbersForTemporaries(ast, r_temps) threeAddrProgram = convertASTtoThreeAddrForm(ast) input_names = tuple([a.value for a in input_order]) signature = ''.join(type_to_typecode[types.get(x, default_type)] for x in input_names) return threeAddrProgram, signature, tempsig, constants, input_names def NumExpr(ex, signature=(), **kwargs): """ Compile an expression built using E. variables to a function. ex can also be specified as a string "2*a+3*b". The order of the input variables and their types can be specified using the signature parameter, which is a list of (name, type) pairs. Returns a `NumExpr` object containing the compiled function. """ # NumExpr can be called either directly by the end-user, in which case # kwargs need to be sanitized by getContext, or by evaluate, # in which case kwargs are in already sanitized. # In that case frame_depth is wrong (it should be 2) but it doesn't matter # since it will not be used (because truediv='auto' has already been # translated to either True or False). context = getContext(kwargs, frame_depth=1) threeAddrProgram, inputsig, tempsig, constants, input_names = precompile(ex, signature, context) program = compileThreeAddrForm(threeAddrProgram) return interpreter.NumExpr(inputsig.encode('ascii'), tempsig.encode('ascii'), program, constants, input_names) def disassemble(nex): """ Given a NumExpr object, return a list which is the program disassembled. """ rev_opcodes = {} for op in interpreter.opcodes: rev_opcodes[interpreter.opcodes[op]] = op r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) def getArg(pc, offset): if sys.version_info[0] < 3: arg = ord(nex.program[pc + offset]) op = rev_opcodes.get(ord(nex.program[pc])) else: arg = nex.program[pc + offset] op = rev_opcodes.get(nex.program[pc]) try: code = op.split(b'_')[1][offset - 1] except IndexError: return None if sys.version_info[0] > 2: # int.to_bytes is not available in Python < 3.2 #code = code.to_bytes(1, sys.byteorder) code = bytes([code]) if arg == 255: return None if code != b'n': if arg == 0: return b'r0' elif arg < r_constants: return ('r%d[%s]' % (arg, nex.input_names[arg - 1])).encode('ascii') elif arg < r_temps: return ('c%d[%s]' % (arg, nex.constants[arg - r_constants])).encode('ascii') else: return ('t%d' % (arg,)).encode('ascii') else: return arg source = [] for pc in range(0, len(nex.program), 4): if sys.version_info[0] < 3: op = rev_opcodes.get(ord(nex.program[pc])) else: op = rev_opcodes.get(nex.program[pc]) dest = getArg(pc, 1) arg1 = getArg(pc, 2) arg2 = getArg(pc, 3) source.append((op, dest, arg1, arg2)) return source def getType(a): kind = a.dtype.kind if kind == 'b': return bool if kind in 'iu': if a.dtype.itemsize > 4: return long_ # ``long`` is for integers of more than 32 bits if kind == 'u' and a.dtype.itemsize == 4: return long_ # use ``long`` here as an ``int`` is not enough return int_ if kind == 'f': if a.dtype.itemsize > 4: return double # ``double`` is for floats of more than 32 bits return float if kind == 'c': return complex if kind == 'S': return bytes raise ValueError("unknown type %s" % a.dtype.name) def getExprNames(text, context): ex = stringToExpression(text, {}, context) ast = expressionToAST(ex) input_order = getInputOrder(ast, None) #try to figure out if vml operations are used by expression if not use_vml: ex_uses_vml = False else: for node in ast.postorderWalk(): if node.astType == 'op' and node.value in vml_functions: ex_uses_vml = True break else: ex_uses_vml = False return [a.value for a in input_order], ex_uses_vml def getArguments(names, local_dict=None, global_dict=None): """Get the arguments based on the names.""" call_frame = sys._getframe(2) clear_local_dict = False if local_dict is None: local_dict = call_frame.f_locals clear_local_dict = True try: frame_globals = call_frame.f_globals if global_dict is None: global_dict = frame_globals # If `call_frame` is the top frame of the interpreter we can't clear its # `local_dict`, because it is actually the `global_dict`. clear_local_dict = clear_local_dict and not frame_globals is local_dict arguments = [] for name in names: try: a = local_dict[name] except KeyError: a = global_dict[name] arguments.append(numpy.asarray(a)) finally: # If we generated local_dict via an explicit reference to f_locals, # clear the dict to prevent creating extra ref counts in the caller's scope # See https://github.com/pydata/numexpr/issues/310 if clear_local_dict: local_dict.clear() return arguments # Dictionaries for caching variable names and compiled expressions _names_cache = CacheDict(256) _numexpr_cache = CacheDict(256) _numexpr_last = {} evaluate_lock = threading.Lock() def evaluate(ex, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs): """Evaluate a simple array expression element-wise, using the new iterator. ex is a string forming an expression, like "2*a+3*b". The values for "a" and "b" will by default be taken from the calling function's frame (through use of sys._getframe()). Alternatively, they can be specifed using the 'local_dict' or 'global_dict' arguments. Parameters ---------- local_dict : dictionary, optional A dictionary that replaces the local operands in current frame. global_dict : dictionary, optional A dictionary that replaces the global operands in current frame. out : NumPy array, optional An existing array where the outcome is going to be stored. Care is required so that this array has the same shape and type than the actual outcome of the computation. Useful for avoiding unnecessary new array allocations. order : {'C', 'F', 'A', or 'K'}, optional Controls the iteration order for operands. 'C' means C order, 'F' means Fortran order, 'A' means 'F' order if all the arrays are Fortran contiguous, 'C' order otherwise, and 'K' means as close to the order the array elements appear in memory as possible. For efficient computations, typically 'K'eep order (the default) is desired. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional Controls what kind of data casting may occur when making a copy or buffering. Setting this to 'unsafe' is not recommended, as it can adversely affect accumulations. * 'no' means the data types should not be cast at all. * 'equiv' means only byte-order changes are allowed. * 'safe' means only casts which can preserve values are allowed. * 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. """ global _numexpr_last if not isinstance(ex, (str, unicode)): raise ValueError("must specify expression as a string") # Get the names for this expression context = getContext(kwargs, frame_depth=1) expr_key = (ex, tuple(sorted(context.items()))) if expr_key not in _names_cache: _names_cache[expr_key] = getExprNames(ex, context) names, ex_uses_vml = _names_cache[expr_key] arguments = getArguments(names, local_dict, global_dict) # Create a signature signature = [(name, getType(arg)) for (name, arg) in zip(names, arguments)] # Look up numexpr if possible. numexpr_key = expr_key + (tuple(signature),) try: compiled_ex = _numexpr_cache[numexpr_key] except KeyError: compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, **context) kwargs = {'out': out, 'order': order, 'casting': casting, 'ex_uses_vml': ex_uses_vml} _numexpr_last = dict(ex=compiled_ex, argnames=names, kwargs=kwargs) with evaluate_lock: return compiled_ex(*arguments, **kwargs) def re_evaluate(local_dict=None): """Re-evaluate the previous executed array expression without any check. This is meant for accelerating loops that are re-evaluating the same expression repeatedly without changing anything else than the operands. If unsure, use evaluate() which is safer. Parameters ---------- local_dict : dictionary, optional A dictionary that replaces the local operands in current frame. """ try: compiled_ex = _numexpr_last['ex'] except KeyError: raise RuntimeError("not a previous evaluate() execution found") argnames = _numexpr_last['argnames'] args = getArguments(argnames, local_dict) kwargs = _numexpr_last['kwargs'] with evaluate_lock: return compiled_ex(*args, **kwargs) numexpr-2.7.1/numexpr/numexpr_config.hpp000066400000000000000000000023001360375525100204530ustar00rootroot00000000000000#ifndef NUMEXPR_CONFIG_HPP #define NUMEXPR_CONFIG_HPP // x86 platform works with unaligned reads and writes // MW: I have seen exceptions to this when the compiler chooses to use aligned SSE #if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) # define USE_UNALIGNED_ACCESS 1 #endif #ifdef SCIPY_MKL_H #define USE_VML #endif #ifdef USE_VML /* The values below have been tuned for a Skylake processor (E3-1245 v5 @ 3.50GHz) */ #define BLOCK_SIZE1 1024 #define BLOCK_SIZE2 16 #else /* The values below have been tuned for a Skylake processor (E3-1245 v5 @ 3.50GHz) */ #define BLOCK_SIZE1 1024 #define BLOCK_SIZE2 16 #endif // The default threadpool size. It's prefer that the user set this via an // environment variable, "NUMEXPR_MAX_THREADS" #define DEFAULT_MAX_THREADS 64 #if defined(_WIN32) #include "win32/pthread.h" #include #define getpid _getpid #else #include #include "unistd.h" #endif #ifdef USE_VML #include "mkl_vml.h" #include "mkl_service.h" #endif #ifdef _WIN32 #ifndef __MINGW32__ #include "missing_posix_functions.hpp" #endif #include "msvc_function_stubs.hpp" #endif #endif // NUMEXPR_CONFIG_HPP numexpr-2.7.1/numexpr/numexpr_object.cpp000066400000000000000000000346701360375525100204660ustar00rootroot00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ #include "module.hpp" #include #include "numexpr_config.hpp" #include "interpreter.hpp" #include "numexpr_object.hpp" static int size_from_char(char c) { switch (c) { case 'b': return sizeof(char); case 'i': return sizeof(int); case 'l': return sizeof(long long); case 'f': return sizeof(float); case 'd': return sizeof(double); case 'c': return 2*sizeof(double); case 's': return 0; /* strings are ok but size must be computed */ default: PyErr_SetString(PyExc_TypeError, "signature value not in 'bilfdcs'"); return -1; } } static void NumExpr_dealloc(NumExprObject *self) { Py_XDECREF(self->signature); Py_XDECREF(self->tempsig); Py_XDECREF(self->constsig); Py_XDECREF(self->fullsig); Py_XDECREF(self->program); Py_XDECREF(self->constants); Py_XDECREF(self->input_names); PyMem_Del(self->mem); PyMem_Del(self->rawmem); PyMem_Del(self->memsteps); PyMem_Del(self->memsizes); Py_TYPE(self)->tp_free((PyObject*)self); } static PyObject * NumExpr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { NumExprObject *self = (NumExprObject *)type->tp_alloc(type, 0); if (self != NULL) { #define INIT_WITH(name, object) \ self->name = object; \ if (!self->name) { \ Py_DECREF(self); \ return NULL; \ } INIT_WITH(signature, PyBytes_FromString("")); INIT_WITH(tempsig, PyBytes_FromString("")); INIT_WITH(constsig, PyBytes_FromString("")); INIT_WITH(fullsig, PyBytes_FromString("")); INIT_WITH(program, PyBytes_FromString("")); INIT_WITH(constants, PyTuple_New(0)); Py_INCREF(Py_None); self->input_names = Py_None; self->mem = NULL; self->rawmem = NULL; self->memsteps = NULL; self->memsizes = NULL; self->rawmemsize = 0; self->n_inputs = 0; self->n_constants = 0; self->n_temps = 0; #undef INIT_WITH } return (PyObject *)self; } #define CHARP(s) ((char *)(s)) static int NumExpr_init(NumExprObject *self, PyObject *args, PyObject *kwds) { int i, j, mem_offset; int n_inputs, n_constants, n_temps; PyObject *signature = NULL, *tempsig = NULL, *constsig = NULL; PyObject *fullsig = NULL, *program = NULL, *constants = NULL; PyObject *input_names = NULL, *o_constants = NULL; int *itemsizes = NULL; char **mem = NULL, *rawmem = NULL; npy_intp *memsteps; npy_intp *memsizes; int rawmemsize; static char *kwlist[] = {CHARP("signature"), CHARP("tempsig"), CHARP("program"), CHARP("constants"), CHARP("input_names"), NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "SSS|OO", kwlist, &signature, &tempsig, &program, &o_constants, &input_names)) { return -1; } n_inputs = (int)PyBytes_Size(signature); n_temps = (int)PyBytes_Size(tempsig); if (o_constants) { if (!PySequence_Check(o_constants) ) { PyErr_SetString(PyExc_TypeError, "constants must be a sequence"); return -1; } n_constants = (int)PySequence_Length(o_constants); if (!(constants = PyTuple_New(n_constants))) return -1; if (!(constsig = PyBytes_FromStringAndSize(NULL, n_constants))) { Py_DECREF(constants); return -1; } if (!(itemsizes = PyMem_New(int, n_constants))) { Py_DECREF(constants); return -1; } for (i = 0; i < n_constants; i++) { PyObject *o; if (!(o = PySequence_GetItem(o_constants, i))) { /* new reference */ Py_DECREF(constants); Py_DECREF(constsig); PyMem_Del(itemsizes); return -1; } PyTuple_SET_ITEM(constants, i, o); /* steals reference */ if (PyBool_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 'b'; itemsizes[i] = size_from_char('b'); continue; } #if PY_MAJOR_VERSION < 3 if (PyInt_Check(o)) { #else if (PyArray_IsScalar(o, Int32)) { #endif PyBytes_AS_STRING(constsig)[i] = 'i'; itemsizes[i] = size_from_char('i'); continue; } #if PY_MAJOR_VERSION < 3 if (PyLong_Check(o)) { #else if (PyArray_IsScalar(o, Int64)) { #endif PyBytes_AS_STRING(constsig)[i] = 'l'; itemsizes[i] = size_from_char('l'); continue; } /* The Float32 scalars are the only ones that should reach here */ if (PyArray_IsScalar(o, Float32)) { PyBytes_AS_STRING(constsig)[i] = 'f'; itemsizes[i] = size_from_char('f'); continue; } if (PyFloat_Check(o)) { /* Python float constants are double precision by default */ PyBytes_AS_STRING(constsig)[i] = 'd'; itemsizes[i] = size_from_char('d'); continue; } if (PyComplex_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 'c'; itemsizes[i] = size_from_char('c'); continue; } if (PyBytes_Check(o)) { PyBytes_AS_STRING(constsig)[i] = 's'; itemsizes[i] = (int)PyBytes_GET_SIZE(o); continue; } PyErr_SetString(PyExc_TypeError, "constants must be of type bool/int/long/float/double/complex/bytes"); Py_DECREF(constsig); Py_DECREF(constants); PyMem_Del(itemsizes); return -1; } } else { n_constants = 0; if (!(constants = PyTuple_New(0))) return -1; if (!(constsig = PyBytes_FromString(""))) { Py_DECREF(constants); return -1; } } fullsig = PyBytes_FromFormat("%c%s%s%s", get_return_sig(program), PyBytes_AS_STRING(signature), PyBytes_AS_STRING(constsig), PyBytes_AS_STRING(tempsig)); if (!fullsig) { Py_DECREF(constants); Py_DECREF(constsig); PyMem_Del(itemsizes); return -1; } if (!input_names) { input_names = Py_None; } /* Compute the size of registers. We leave temps out (will be malloc'ed later on). */ rawmemsize = 0; for (i = 0; i < n_constants; i++) rawmemsize += itemsizes[i]; rawmemsize *= BLOCK_SIZE1; mem = PyMem_New(char *, 1 + n_inputs + n_constants + n_temps); rawmem = PyMem_New(char, rawmemsize); memsteps = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); memsizes = PyMem_New(npy_intp, 1 + n_inputs + n_constants + n_temps); if (!mem || !rawmem || !memsteps || !memsizes) { Py_DECREF(constants); Py_DECREF(constsig); Py_DECREF(fullsig); PyMem_Del(itemsizes); PyMem_Del(mem); PyMem_Del(rawmem); PyMem_Del(memsteps); PyMem_Del(memsizes); return -1; } /* 0 -> output [1, n_inputs+1) -> inputs [n_inputs+1, n_inputs+n_consts+1) -> constants [n_inputs+n_consts+1, n_inputs+n_consts+n_temps+1) -> temps */ /* Fill in 'mem' and 'rawmem' for constants */ mem_offset = 0; for (i = 0; i < n_constants; i++) { char c = PyBytes_AS_STRING(constsig)[i]; int size = itemsizes[i]; mem[i+n_inputs+1] = rawmem + mem_offset; mem_offset += BLOCK_SIZE1 * size; memsteps[i+n_inputs+1] = memsizes[i+n_inputs+1] = size; /* fill in the constants */ if (c == 'b') { char *bmem = (char*)mem[i+n_inputs+1]; char value = (char)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { bmem[j] = value; } } else if (c == 'i') { int *imem = (int*)mem[i+n_inputs+1]; int value = (int)PyLong_AsLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { imem[j] = value; } } else if (c == 'l') { long long *lmem = (long long*)mem[i+n_inputs+1]; long long value = PyLong_AsLongLong(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { lmem[j] = value; } } else if (c == 'f') { /* In this particular case the constant is in a NumPy scalar and in a regular Python object */ float *fmem = (float*)mem[i+n_inputs+1]; float value = PyArrayScalar_VAL(PyTuple_GET_ITEM(constants, i), Float); for (j = 0; j < BLOCK_SIZE1; j++) { fmem[j] = value; } } else if (c == 'd') { double *dmem = (double*)mem[i+n_inputs+1]; double value = PyFloat_AS_DOUBLE(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < BLOCK_SIZE1; j++) { dmem[j] = value; } } else if (c == 'c') { double *cmem = (double*)mem[i+n_inputs+1]; Py_complex value = PyComplex_AsCComplex(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < 2*BLOCK_SIZE1; j+=2) { cmem[j] = value.real; cmem[j+1] = value.imag; } } else if (c == 's') { char *smem = (char*)mem[i+n_inputs+1]; char *value = PyBytes_AS_STRING(PyTuple_GET_ITEM(constants, i)); for (j = 0; j < size*BLOCK_SIZE1; j+=size) { memcpy(smem + j, value, size); } } } /* This is no longer needed since no unusual item sizes appear in temporaries (there are no string temporaries). */ PyMem_Del(itemsizes); /* Fill in 'memsteps' and 'memsizes' for temps */ for (i = 0; i < n_temps; i++) { char c = PyBytes_AS_STRING(tempsig)[i]; int size = size_from_char(c); memsteps[i+n_inputs+n_constants+1] = size; memsizes[i+n_inputs+n_constants+1] = size; } /* See if any errors occured (e.g., in size_from_char) or if mem_offset is wrong */ if (PyErr_Occurred() || mem_offset != rawmemsize) { if (mem_offset != rawmemsize) { PyErr_Format(PyExc_RuntimeError, "mem_offset does not match rawmemsize"); } Py_DECREF(constants); Py_DECREF(constsig); Py_DECREF(fullsig); PyMem_Del(mem); PyMem_Del(rawmem); PyMem_Del(memsteps); PyMem_Del(memsizes); return -1; } #define REPLACE_OBJ(arg) \ {PyObject *tmp = self->arg; \ self->arg = arg; \ Py_XDECREF(tmp);} #define INCREF_REPLACE_OBJ(arg) {Py_INCREF(arg); REPLACE_OBJ(arg);} #define REPLACE_MEM(arg) {PyMem_Del(self->arg); self->arg=arg;} INCREF_REPLACE_OBJ(signature); INCREF_REPLACE_OBJ(tempsig); REPLACE_OBJ(constsig); REPLACE_OBJ(fullsig); INCREF_REPLACE_OBJ(program); REPLACE_OBJ(constants); INCREF_REPLACE_OBJ(input_names); REPLACE_MEM(mem); REPLACE_MEM(rawmem); REPLACE_MEM(memsteps); REPLACE_MEM(memsizes); self->rawmemsize = rawmemsize; self->n_inputs = n_inputs; self->n_constants = n_constants; self->n_temps = n_temps; #undef REPLACE_OBJ #undef INCREF_REPLACE_OBJ #undef REPLACE_MEM return check_program(self); } static PyMethodDef NumExpr_methods[] = { {"run", (PyCFunction) NumExpr_run, METH_VARARGS|METH_KEYWORDS, NULL}, {NULL, NULL} }; static PyMemberDef NumExpr_members[] = { {CHARP("signature"), T_OBJECT_EX, offsetof(NumExprObject, signature), READONLY, NULL}, {CHARP("constsig"), T_OBJECT_EX, offsetof(NumExprObject, constsig), READONLY, NULL}, {CHARP("tempsig"), T_OBJECT_EX, offsetof(NumExprObject, tempsig), READONLY, NULL}, {CHARP("fullsig"), T_OBJECT_EX, offsetof(NumExprObject, fullsig), READONLY, NULL}, {CHARP("program"), T_OBJECT_EX, offsetof(NumExprObject, program), READONLY, NULL}, {CHARP("constants"), T_OBJECT_EX, offsetof(NumExprObject, constants), READONLY, NULL}, {CHARP("input_names"), T_OBJECT, offsetof(NumExprObject, input_names), 0, NULL}, {NULL}, }; PyTypeObject NumExprType = { PyVarObject_HEAD_INIT(NULL, 0) "numexpr.NumExpr", /*tp_name*/ sizeof(NumExprObject), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)NumExpr_dealloc, /*tp_dealloc*/ 0, /*tp_print*/ 0, /*tp_getattr*/ 0, /*tp_setattr*/ 0, /*tp_compare*/ 0, /*tp_repr*/ 0, /*tp_as_number*/ 0, /*tp_as_sequence*/ 0, /*tp_as_mapping*/ 0, /*tp_hash */ (ternaryfunc)NumExpr_run, /*tp_call*/ 0, /*tp_str*/ 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ "NumExpr objects", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ NumExpr_methods, /* tp_methods */ NumExpr_members, /* tp_members */ 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)NumExpr_init, /* tp_init */ 0, /* tp_alloc */ NumExpr_new, /* tp_new */ }; numexpr-2.7.1/numexpr/numexpr_object.hpp000066400000000000000000000020551360375525100204630ustar00rootroot00000000000000#ifndef NUMEXPR_OBJECT_HPP #define NUMEXPR_OBJECT_HPP /********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ struct NumExprObject { PyObject_HEAD PyObject *signature; /* a python string */ PyObject *tempsig; PyObject *constsig; PyObject *fullsig; PyObject *program; /* a python string */ PyObject *constants; /* a tuple of int/float/complex */ PyObject *input_names; /* tuple of strings */ char **mem; /* pointers to registers */ char *rawmem; /* a chunks of raw memory for storing registers */ npy_intp *memsteps; npy_intp *memsizes; int rawmemsize; int n_inputs; int n_constants; int n_temps; }; extern PyTypeObject NumExprType; #endif // NUMEXPR_OBJECT_HPP numexpr-2.7.1/numexpr/opcodes.hpp000066400000000000000000000162161360375525100170770ustar00rootroot00000000000000/********************************************************************* Numexpr - Fast numerical array expression evaluator for NumPy. License: MIT Author: See AUTHORS.txt See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ /* OPCODE(n, enum_name, exported, return_type, arg1_type, arg2_type, arg3_type) `exported` is NULL if the opcode shouldn't exported by the Python module. Types are Tb, Ti, Tl, Tf, Td, Tc, Ts, Tn, and T0; these symbols should be #defined to whatever is needed. (T0 is the no-such-arg type.) */ OPCODE(0, OP_NOOP, "noop", T0, T0, T0, T0) OPCODE(1, OP_COPY_BB, "copy_bb", Tb, Tb, T0, T0) OPCODE(2, OP_INVERT_BB, "invert_bb", Tb, Tb, T0, T0) OPCODE(3, OP_AND_BBB, "and_bbb", Tb, Tb, Tb, T0) OPCODE(4, OP_OR_BBB, "or_bbb", Tb, Tb, Tb, T0) OPCODE(5, OP_EQ_BBB, "eq_bbb", Tb, Tb, Tb, T0) OPCODE(6, OP_NE_BBB, "ne_bbb", Tb, Tb, Tb, T0) OPCODE(7, OP_GT_BII, "gt_bii", Tb, Ti, Ti, T0) OPCODE(8, OP_GE_BII, "ge_bii", Tb, Ti, Ti, T0) OPCODE(9, OP_EQ_BII, "eq_bii", Tb, Ti, Ti, T0) OPCODE(10, OP_NE_BII, "ne_bii", Tb, Ti, Ti, T0) OPCODE(11, OP_GT_BLL, "gt_bll", Tb, Tl, Tl, T0) OPCODE(12, OP_GE_BLL, "ge_bll", Tb, Tl, Tl, T0) OPCODE(13, OP_EQ_BLL, "eq_bll", Tb, Tl, Tl, T0) OPCODE(14, OP_NE_BLL, "ne_bll", Tb, Tl, Tl, T0) OPCODE(15, OP_GT_BFF, "gt_bff", Tb, Tf, Tf, T0) OPCODE(16, OP_GE_BFF, "ge_bff", Tb, Tf, Tf, T0) OPCODE(17, OP_EQ_BFF, "eq_bff", Tb, Tf, Tf, T0) OPCODE(18, OP_NE_BFF, "ne_bff", Tb, Tf, Tf, T0) OPCODE(19, OP_GT_BDD, "gt_bdd", Tb, Td, Td, T0) OPCODE(20, OP_GE_BDD, "ge_bdd", Tb, Td, Td, T0) OPCODE(21, OP_EQ_BDD, "eq_bdd", Tb, Td, Td, T0) OPCODE(22, OP_NE_BDD, "ne_bdd", Tb, Td, Td, T0) OPCODE(23, OP_GT_BSS, "gt_bss", Tb, Ts, Ts, T0) OPCODE(24, OP_GE_BSS, "ge_bss", Tb, Ts, Ts, T0) OPCODE(25, OP_EQ_BSS, "eq_bss", Tb, Ts, Ts, T0) OPCODE(26, OP_NE_BSS, "ne_bss", Tb, Ts, Ts, T0) OPCODE(27, OP_CAST_IB, "cast_ib", Ti, Tb, T0, T0) OPCODE(28, OP_COPY_II, "copy_ii", Ti, Ti, T0, T0) OPCODE(29, OP_ONES_LIKE_II, "ones_like_ii", Ti, T0, T0, T0) OPCODE(30, OP_NEG_II, "neg_ii", Ti, Ti, T0, T0) OPCODE(31, OP_ADD_III, "add_iii", Ti, Ti, Ti, T0) OPCODE(32, OP_SUB_III, "sub_iii", Ti, Ti, Ti, T0) OPCODE(33, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) OPCODE(34, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) OPCODE(35, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) OPCODE(36, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) OPCODE(37, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) OPCODE(38, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) OPCODE(39, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) OPCODE(40, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) OPCODE(41, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) OPCODE(42, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) OPCODE(43, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) OPCODE(44, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) OPCODE(45, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) OPCODE(46, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) OPCODE(47, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) OPCODE(48, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) OPCODE(49, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) OPCODE(50, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) OPCODE(51, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) OPCODE(52, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) OPCODE(53, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) OPCODE(54, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) OPCODE(55, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) OPCODE(56, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) OPCODE(57, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) OPCODE(58, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) OPCODE(59, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) OPCODE(60, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) OPCODE(61, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) OPCODE(62, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) OPCODE(63, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) OPCODE(64, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) OPCODE(65, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) OPCODE(66, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) OPCODE(67, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) OPCODE(68, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) OPCODE(69, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) OPCODE(70, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) OPCODE(71, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) OPCODE(72, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) OPCODE(73, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) OPCODE(74, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) OPCODE(75, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) OPCODE(76, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) OPCODE(77, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) OPCODE(78, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) OPCODE(79, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) OPCODE(80, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) OPCODE(81, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) OPCODE(82, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) OPCODE(83, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) OPCODE(84, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) OPCODE(85, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) OPCODE(86, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) OPCODE(87, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) OPCODE(88, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) OPCODE(89, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) OPCODE(90, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) OPCODE(91, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) OPCODE(92, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) OPCODE(93, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) OPCODE(94, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) OPCODE(95, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) OPCODE(96, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) OPCODE(97, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) OPCODE(98, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) OPCODE(99, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) OPCODE(100, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) OPCODE(101, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) OPCODE(102, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) OPCODE(103, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) OPCODE(104, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) OPCODE(105, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) OPCODE(106, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ OPCODE(107, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) OPCODE(108, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) OPCODE(109, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) OPCODE(110, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) OPCODE(111, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) OPCODE(112, OP_PROD, NULL, T0, T0, T0, T0) OPCODE(113, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) OPCODE(114, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) OPCODE(115, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) OPCODE(116, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) OPCODE(117, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) OPCODE(118, OP_MIN, NULL, T0, T0, T0, T0) OPCODE(119, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) OPCODE(120, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) OPCODE(121, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) OPCODE(122, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) OPCODE(123, OP_MAX, NULL, T0, T0, T0, T0) OPCODE(124, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) OPCODE(125, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) OPCODE(126, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) OPCODE(127, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) /* Should be the last opcode */ OPCODE(128, OP_END, NULL, T0, T0, T0, T0) numexpr-2.7.1/numexpr/str-two-way.hpp000066400000000000000000000341341360375525100176570ustar00rootroot00000000000000/* Byte-wise substring search, using the Two-Way algorithm. * Copyright (C) 2008, 2010 Eric Blake * Permission to use, copy, modify, and distribute this software * is freely granted, provided that this notice is preserved. */ /* Before including this file, you need to include , and define: RETURN_TYPE A macro that expands to the return type. AVAILABLE(h, h_l, j, n_l) A macro that returns nonzero if there are at least N_L bytes left starting at H[J]. H is 'unsigned char *', H_L, J, and N_L are 'size_t'; H_L is an lvalue. For NUL-terminated searches, H_L can be modified each iteration to avoid having to compute the end of H up front. For case-insensitivity, you may optionally define: CMP_FUNC(p1, p2, l) A macro that returns 0 iff the first L characters of P1 and P2 are equal. CANON_ELEMENT(c) A macro that canonicalizes an element right after it has been fetched from one of the two strings. The argument is an 'unsigned char'; the result must be an 'unsigned char' as well. This file undefines the macros documented above, and defines LONG_NEEDLE_THRESHOLD. */ #include /* Python 2.7 (the only Python 2.x version supported as of now and until 2020) is built on windows with Visual Studio 2008 C compiler. That dictates that the compiler which must be used by authors of third party Python modules. See https://mail.python.org/pipermail/distutils-sig/2014-September/024885.html Unfortunately this version of Visual Studio doesn't claim to be C99 compatible and in particular it lacks the stdint.h header. So we have to replace it with a public domain version. Visual Studio 2010 and later have stdint.h. */ #ifdef _MSC_VER #if _MSC_VER <= 1500 #include "win32/stdint.h" #endif #else #include #endif /* We use the Two-Way string matching algorithm, which guarantees linear complexity with constant space. Additionally, for long needles, we also use a bad character shift table similar to the Boyer-Moore algorithm to achieve improved (potentially sub-linear) performance. See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm */ /* Point at which computing a bad-byte shift table is likely to be worthwhile. Small needles should not compute a table, since it adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a speedup no greater than a factor of NEEDLE_LEN. The larger the needle, the better the potential performance gain. On the other hand, on non-POSIX systems with CHAR_BIT larger than eight, the memory required for the table is prohibitive. */ #if CHAR_BIT < 10 # define LONG_NEEDLE_THRESHOLD 32U #else # define LONG_NEEDLE_THRESHOLD SIZE_MAX #endif #define MAX(a, b) ((a < b) ? (b) : (a)) #ifndef CANON_ELEMENT # define CANON_ELEMENT(c) c #endif #ifndef CMP_FUNC # define CMP_FUNC memcmp #endif /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. Return the index of the first byte in the right half, and set *PERIOD to the global period of the right half. The global period of a string is the smallest index (possibly its length) at which all remaining bytes in the string are repetitions of the prefix (the last repetition may be a subset of the prefix). When NEEDLE is factored into two halves, a local period is the length of the smallest word that shares a suffix with the left half and shares a prefix with the right half. All factorizations of a non-empty NEEDLE have a local period of at least 1 and no greater than NEEDLE_LEN. A critical factorization has the property that the local period equals the global period. All strings have at least one critical factorization with the left half smaller than the global period. Given an ordered alphabet, a critical factorization can be computed in linear time, with 2 * NEEDLE_LEN comparisons, by computing the larger of two ordered maximal suffixes. The ordered maximal suffixes are determined by lexicographic comparison of periodicity. */ static size_t critical_factorization (const unsigned char *needle, size_t needle_len, size_t *period) { /* Index of last byte of left half, or SIZE_MAX. */ size_t max_suffix, max_suffix_rev; size_t j; /* Index into NEEDLE for current candidate suffix. */ size_t k; /* Offset into current period. */ size_t p; /* Intermediate period. */ unsigned char a, b; /* Current comparison bytes. */ /* Invariants: 0 <= j < NEEDLE_LEN - 1 -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) min(max_suffix, max_suffix_rev) < global period of NEEDLE 1 <= p <= global period of NEEDLE p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] 1 <= k <= p */ /* Perform lexicographic search. */ max_suffix = SIZE_MAX; j = 0; k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); b = CANON_ELEMENT (needle[(size_t)(max_suffix + k)]); if (a < b) { /* Suffix is smaller, period is entire prefix so far. */ j += k; k = 1; p = j - max_suffix; } else if (a == b) { /* Advance through repetition of the current period. */ if (k != p) ++k; else { j += p; k = 1; } } else /* b < a */ { /* Suffix is larger, start over from current location. */ max_suffix = j++; k = p = 1; } } *period = p; /* Perform reverse lexicographic search. */ max_suffix_rev = SIZE_MAX; j = 0; k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); b = CANON_ELEMENT (needle[max_suffix_rev + k]); if (b < a) { /* Suffix is smaller, period is entire prefix so far. */ j += k; k = 1; p = j - max_suffix_rev; } else if (a == b) { /* Advance through repetition of the current period. */ if (k != p) ++k; else { j += p; k = 1; } } else /* a < b */ { /* Suffix is larger, start over from current location. */ max_suffix_rev = j++; k = p = 1; } } /* Choose the longer suffix. Return the first byte of the right half, rather than the last byte of the left half. */ if (max_suffix_rev + 1 < max_suffix + 1) return max_suffix + 1; *period = p; return max_suffix_rev + 1; } /* Return the first location of non-empty NEEDLE within HAYSTACK, or NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. Performance is guaranteed to be linear, with an initialization cost of 2 * NEEDLE_LEN comparisons. If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ static RETURN_TYPE two_way_short_needle (const unsigned char *haystack, size_t haystack_len, const unsigned char *needle, size_t needle_len) { size_t i; /* Index into current byte of NEEDLE. */ size_t j; /* Index into current window of HAYSTACK. */ size_t period; /* The period of the right half of needle. */ size_t suffix; /* The index of the right half of needle. */ /* Factor the needle into two halves, such that the left half is smaller than the global period, and the right half is periodic (with a period as large as NEEDLE_LEN - suffix). */ suffix = critical_factorization (needle, needle_len, &period); /* Perform the search. Each iteration compares the right half first. */ if (CMP_FUNC (needle, needle + period, suffix) == 0) { /* Entire needle is periodic; a mismatch can only advance by the period, so use memory to avoid rescanning known occurrences of the period. */ size_t memory = 0; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Scan for matches in right half. */ i = MAX (suffix, memory); while (i < needle_len && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; if (needle_len <= i) { /* Scan for matches in left half. */ i = suffix - 1; while (memory < i + 1 && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) --i; if (i + 1 < memory + 1) return (RETURN_TYPE) (haystack + j); /* No match, so remember how many repetitions of period on the right half were scanned. */ j += period; memory = needle_len - period; } else { j += i - suffix + 1; memory = 0; } } } else { /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Scan for matches in right half. */ i = suffix; while (i < needle_len && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; if (needle_len <= i) { /* Scan for matches in left half. */ i = suffix - 1; while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) --i; if (i == SIZE_MAX) return (RETURN_TYPE) (haystack + j); j += period; } else j += i - suffix + 1; } } return NULL; } /* Return the first location of non-empty NEEDLE within HAYSTACK, or NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. Performance is guaranteed to be linear, with an initialization cost of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations. If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and sublinear performance is not possible. */ static RETURN_TYPE two_way_long_needle (const unsigned char *haystack, size_t haystack_len, const unsigned char *needle, size_t needle_len) { size_t i; /* Index into current byte of NEEDLE. */ size_t j; /* Index into current window of HAYSTACK. */ size_t period; /* The period of the right half of needle. */ size_t suffix; /* The index of the right half of needle. */ size_t shift_table[1U << CHAR_BIT]; /* See below. */ /* Factor the needle into two halves, such that the left half is smaller than the global period, and the right half is periodic (with a period as large as NEEDLE_LEN - suffix). */ suffix = critical_factorization (needle, needle_len, &period); /* Populate shift_table. For each possible byte value c, shift_table[c] is the distance from the last occurrence of c to the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */ for (i = 0; i < 1U << CHAR_BIT; i++) shift_table[i] = needle_len; for (i = 0; i < needle_len; i++) shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1; /* Perform the search. Each iteration compares the right half first. */ if (CMP_FUNC (needle, needle + period, suffix) == 0) { /* Entire needle is periodic; a mismatch can only advance by the period, so use memory to avoid rescanning known occurrences of the period. */ size_t memory = 0; size_t shift; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Check the last byte first; if it does not match, then shift to the next possible match location. */ shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; if (0 < shift) { if (memory && shift < period) { /* Since needle is periodic, but the last period has a byte out of place, there can be no match until after the mismatch. */ shift = needle_len - period; } memory = 0; j += shift; continue; } /* Scan for matches in right half. The last byte has already been matched, by virtue of the shift table. */ i = MAX (suffix, memory); while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; if (needle_len - 1 <= i) { /* Scan for matches in left half. */ i = suffix - 1; while (memory < i + 1 && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) --i; if (i + 1 < memory + 1) return (RETURN_TYPE) (haystack + j); /* No match, so remember how many repetitions of period on the right half were scanned. */ j += period; memory = needle_len - period; } else { j += i - suffix + 1; memory = 0; } } } else { /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ size_t shift; period = MAX (suffix, needle_len - suffix) + 1; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) { /* Check the last byte first; if it does not match, then shift to the next possible match location. */ shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; if (0 < shift) { j += shift; continue; } /* Scan for matches in right half. The last byte has already been matched, by virtue of the shift table. */ i = suffix; while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; if (needle_len - 1 <= i) { /* Scan for matches in left half. */ i = suffix - 1; while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) --i; if (i == SIZE_MAX) return (RETURN_TYPE) (haystack + j); j += period; } else j += i - suffix + 1; } } return NULL; } #undef AVAILABLE #undef CANON_ELEMENT #undef CMP_FUNC #undef MAX #undef RETURN_TYPE numexpr-2.7.1/numexpr/tests/000077500000000000000000000000001360375525100160665ustar00rootroot00000000000000numexpr-2.7.1/numexpr/tests/__init__.py000066400000000000000000000006771360375525100202110ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from numexpr.tests.test_numexpr import test, print_versions if __name__ == '__main__': test() numexpr-2.7.1/numexpr/tests/test_numexpr.py000066400000000000000000001267021360375525100212050ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### from __future__ import absolute_import, print_function import os import sys import platform import warnings from contextlib import contextmanager import subprocess import numpy as np from numpy import ( array, arange, empty, zeros, int32, int64, uint16, complex_, float64, rec, copy, ones_like, where, alltrue, linspace, sum, prod, sqrt, fmod, floor, ceil, sin, cos, tan, arcsin, arccos, arctan, arctan2, sinh, cosh, tanh, arcsinh, arccosh, arctanh, log, log1p, log10, exp, expm1, conj) from numpy.testing import (assert_equal, assert_array_equal, assert_array_almost_equal, assert_allclose) from numpy import shape, allclose, array_equal, ravel, isnan, isinf import numexpr from numexpr import E, NumExpr, evaluate, re_evaluate, disassemble, use_vml import unittest TestCase = unittest.TestCase double = np.double if sys.version_info[0] >= 3: long = int # Recommended minimum versions from distutils.version import LooseVersion minimum_numpy_version = LooseVersion('1.7.0') class test_numexpr(TestCase): """Testing with 1 thread""" nthreads = 1 def setUp(self): numexpr.set_num_threads(self.nthreads) def test_simple(self): ex = 2.0 * E.a + 3.0 * E.b * E.c sig = [('a', double), ('b', double), ('c', double)] func = NumExpr(ex, signature=sig) x = func(array([1., 2, 3]), array([4., 5, 6]), array([7., 8, 9])) assert_array_equal(x, array([86., 124., 168.])) def test_simple_expr_small_array(self): func = NumExpr(E.a) x = arange(100.0) y = func(x) assert_array_equal(x, y) def test_simple_expr(self): func = NumExpr(E.a) x = arange(1e6) y = func(x) assert_array_equal(x, y) def test_rational_expr(self): func = NumExpr((E.a + 2.0 * E.b) / (1 + E.a + 4 * E.b * E.b)) a = arange(1e6) b = arange(1e6) * 0.1 x = (a + 2 * b) / (1 + a + 4 * b * b) y = func(a, b) assert_array_almost_equal(x, y) def test_reductions(self): # Check that they compile OK. assert_equal(disassemble( NumExpr("sum(x**2+2, axis=None)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'sum_ddn', b'r0', b't3', None)]) assert_equal(disassemble( NumExpr("sum(x**2+2, axis=1)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'sum_ddn', b'r0', b't3', 1)]) assert_equal(disassemble( NumExpr("prod(x**2+2, axis=2)", [('x', double)])), [(b'mul_ddd', b't3', b'r1[x]', b'r1[x]'), (b'add_ddd', b't3', b't3', b'c2[2.0]'), (b'prod_ddn', b'r0', b't3', 2)]) # Check that full reductions work. x = zeros(100000) + .01 # checks issue #41 assert_allclose(evaluate("sum(x+2,axis=None)"), sum(x + 2, axis=None)) assert_allclose(evaluate("sum(x+2,axis=0)"), sum(x + 2, axis=0)) assert_allclose(evaluate("prod(x,axis=0)"), prod(x, axis=0)) assert_allclose(evaluate("min(x)"), np.min(x)) assert_allclose(evaluate("max(x,axis=0)"), np.max(x, axis=0)) # Fix for #277, array with leading singleton dimension x = np.arange(10).reshape(1,10) assert_allclose(evaluate("sum(x,axis=None)"), sum(x, axis=None) ) assert_allclose(evaluate("sum(x,axis=0)"), sum(x, axis=0) ) assert_allclose(evaluate("sum(x,axis=1)"), sum(x, axis=1) ) x = arange(10.0) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) x = arange(100.0) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0)) assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0)) x = linspace(0.1, 1.0, 2000) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) assert_allclose(evaluate("min(x-1,axis=0)"), np.min(x - 1, axis=0)) assert_allclose(evaluate("max(x-1,axis=0)"), np.max(x - 1, axis=0)) # Check that reductions along an axis work y = arange(9.0).reshape(3, 3) assert_allclose(evaluate("sum(y**2, axis=1)"), sum(y ** 2, axis=1)) assert_allclose(evaluate("sum(y**2, axis=0)"), sum(y ** 2, axis=0)) assert_allclose(evaluate("sum(y**2, axis=None)"), sum(y ** 2, axis=None)) assert_allclose(evaluate("prod(y**2, axis=1)"), prod(y ** 2, axis=1)) assert_allclose(evaluate("prod(y**2, axis=0)"), prod(y ** 2, axis=0)) assert_allclose(evaluate("prod(y**2, axis=None)"), prod(y ** 2, axis=None)) assert_allclose(evaluate("min(y**2, axis=1)"), np.min(y ** 2, axis=1)) assert_allclose(evaluate("min(y**2, axis=0)"), np.min(y ** 2, axis=0)) assert_allclose(evaluate("min(y**2, axis=None)"), np.min(y ** 2, axis=None)) assert_allclose(evaluate("max(y**2, axis=1)"), np.max(y ** 2, axis=1)) assert_allclose(evaluate("max(y**2, axis=0)"), np.max(y ** 2, axis=0)) assert_allclose(evaluate("max(y**2, axis=None)"), np.max(y ** 2, axis=None)) # Check integers x = arange(10.) x = x.astype(int) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) # Check longs x = x.astype(long) assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x**2+2,axis=0)"), prod(x ** 2 + 2, axis=0)) assert_allclose(evaluate("min(x**2+2,axis=0)"), np.min(x ** 2 + 2, axis=0)) assert_allclose(evaluate("max(x**2+2,axis=0)"), np.max(x ** 2 + 2, axis=0)) # Check complex x = x + .1j assert_allclose(evaluate("sum(x**2+2,axis=0)"), sum(x ** 2 + 2, axis=0)) assert_allclose(evaluate("prod(x-1,axis=0)"), prod(x - 1, axis=0)) def test_in_place(self): x = arange(10000.).reshape(1000, 10) evaluate("x + 3", out=x) assert_equal(x, arange(10000.).reshape(1000, 10) + 3) y = arange(10) evaluate("(x - 3) * y + (x - 3)", out=x) assert_equal(x, arange(10000.).reshape(1000, 10) * (arange(10) + 1)) def test_axis(self): y = arange(9.0).reshape(3, 3) try: evaluate("sum(y, axis=2)") except ValueError: pass else: raise ValueError("should raise exception!") try: evaluate("sum(y, axis=-3)") except ValueError: pass else: raise ValueError("should raise exception!") try: # Negative axis are not supported evaluate("sum(y, axis=-1)") except ValueError: pass else: raise ValueError("should raise exception!") def test_r0_reuse(self): assert_equal(disassemble(NumExpr("x * x + 2", [('x', double)])), [(b'mul_ddd', b'r0', b'r1[x]', b'r1[x]'), (b'add_ddd', b'r0', b'r0', b'c2[2.0]')]) def test_str_contains_basic0(self): res = evaluate('contains(b"abc", b"ab")') assert_equal(res, True) def test_str_contains_basic1(self): haystack = array([b'abc', b'def', b'xyz', b'x11', b'za']) res = evaluate('contains(haystack, b"ab")') assert_equal(res, [True, False, False, False, False]) def test_str_contains_basic2(self): haystack = array([b'abc', b'def', b'xyz', b'x11', b'za']) res = evaluate('contains(b"abcd", haystack)') assert_equal(res, [True, False, False, False, False]) def test_str_contains_basic3(self): haystacks = array( [b'abckkk', b'adef', b'xyz', b'x11abcp', b'za', b'abc']) needles = array( [b'abc', b'def', b'aterr', b'oot', b'zu', b'ab']) res = evaluate('contains(haystacks, needles)') assert_equal(res, [True, True, False, False, False, True]) def test_str_contains_basic4(self): needles = array( [b'abc', b'def', b'aterr', b'oot', b'zu', b'ab c', b' abc', b'abc ']) res = evaluate('contains(b"test abc here", needles)') assert_equal(res, [True, False, False, False, False, False, True, True]) def test_str_contains_basic5(self): needles = array( [b'abc', b'ab c', b' abc', b' abc ', b'\tabc', b'c h']) res = evaluate('contains(b"test abc here", needles)') assert_equal(res, [True, False, True, True, False, True]) # Compare operation of Python 'in' operator with 'contains' using a # product of two lists of strings. def test_str_contains_listproduct(self): from itertools import product small = [ 'It w', 'as th', 'e Whit', 'e Rab', 'bit,', ' tro', 'tting', ' sl', 'owly', ' back ', 'again,', ' and', ' lo', 'okin', 'g a', 'nxious', 'ly a', 'bou', 't a', 's it w', 'ent,', ' as i', 'f it', ' had l', 'ost', ' some', 'thi', 'ng; a', 'nd ', 'she ', 'heard ', 'it mut', 'terin', 'g to ', 'its', 'elf ', "'The", ' Duch', 'ess! T', 'he ', 'Duches', 's! Oh ', 'my dea', 'r paws', '! Oh ', 'my f', 'ur ', 'and ', 'whiske', 'rs! ', 'She', "'ll g", 'et me', ' ex', 'ecu', 'ted, ', 'as su', 're a', 's f', 'errets', ' are f', 'errets', '! Wh', 'ere ', 'CAN', ' I hav', 'e d', 'roppe', 'd t', 'hem,', ' I wo', 'nder?', "' A", 'lice', ' gu', 'essed', ' in a', ' mom', 'ent ', 'tha', 't it w', 'as ', 'looki', 'ng f', 'or ', 'the fa', 'n and ', 'the', ' pai', 'r of w', 'hit', 'e kid', ' glo', 'ves', ', and ', 'she ', 'very g', 'ood', '-na', 'turedl', 'y be', 'gan h', 'unt', 'ing', ' about', ' for t', 'hem', ', but', ' they ', 'wer', 'e nowh', 'ere to', ' be', ' se', 'en--', 'ever', 'ythin', 'g seem', 'ed ', 'to ', 'have c', 'hang', 'ed ', 'since', ' he', 'r swim', ' in', ' the', ' pool,', ' and', ' the g', 'reat ', 'hal', 'l, w', 'ith', ' th', 'e gl', 'ass t', 'abl', 'e and ', 'the', ' li', 'ttle', ' doo', 'r, ha', 'd v', 'ani', 'shed c', 'omp', 'lete', 'ly.'] big = [ 'It wa', 's the', ' W', 'hit', 'e ', 'Ra', 'bb', 'it, t', 'ro', 'tting s', 'lowly', ' back ', 'agai', 'n, and', ' l', 'ookin', 'g ', 'an', 'xiously', ' about ', 'as it w', 'ent, as', ' if ', 'it had', ' los', 't ', 'so', 'mething', '; and', ' she h', 'eard ', 'it ', 'mutteri', 'ng to', ' itself', " 'The ", 'Duchess', '! ', 'Th', 'e ', 'Duchess', '! Oh m', 'y de', 'ar paws', '! ', 'Oh my ', 'fu', 'r and w', 'hiskers', "! She'", 'll ', 'get', ' me ', 'execute', 'd,', ' a', 's ', 'su', 're as ', 'fe', 'rrets', ' are f', 'errets!', ' Wher', 'e CAN', ' I ha', 've dro', 'pped t', 'hem', ', I ', 'won', "der?' A", 'lice g', 'uess', 'ed ', 'in a m', 'omen', 't that', ' i', 't was l', 'ook', 'ing f', 'or th', 'e ', 'fan and', ' th', 'e p', 'air o', 'f whit', 'e ki', 'd glove', 's, and ', 'she v', 'ery ', 'good-na', 'tu', 'redl', 'y be', 'gan hun', 'ti', 'ng abou', 't for t', 'he', 'm, bu', 't t', 'hey ', 'were n', 'owhere', ' to b', 'e s', 'een-', '-eve', 'rythi', 'ng see', 'me', 'd ', 'to ha', 've', ' c', 'hanged', ' sinc', 'e her s', 'wim ', 'in the ', 'pool,', ' an', 'd the g', 'rea', 't h', 'all, wi', 'th the ', 'glas', 's t', 'able an', 'd th', 'e littl', 'e door,', ' had va', 'ni', 'shed co', 'mpletel', 'y.'] p = list(product(small, big)) python_in = [x[0] in x[1] for x in p] a = [x[0].encode() for x in p] b = [x[1].encode() for x in p] res = [bool(x) for x in evaluate('contains(b, a)')] assert_equal(res, python_in) def test_str_contains_withemptystr1(self): withemptystr = array([b'abc', b'def', b'']) res = evaluate('contains(b"abcd", withemptystr)') assert_equal(res, [True, False, True]) def test_str_contains_withemptystr2(self): withemptystr = array([b'abc', b'def', b'']) res = evaluate('contains(withemptystr, b"")') assert_equal(res, [True, True, True]) def test_str_contains_long_needle(self): a = b'1' + b'a' * 40 b = b'a' * 40 res = evaluate('contains(a, b)') assert_equal(res, True) def test_where_scalar_bool(self): a = True b = array([1, 2]) c = array([3, 4]) res = evaluate('where(a, b, c)') assert_array_equal(res, b) a = False res = evaluate('where(a, b, c)') assert_array_equal(res, c) def test_refcount(self): # Regression test for issue #310 a = array([1]) assert sys.getrefcount(a) == 2 evaluate('1') assert sys.getrefcount(a) == 2 def test_locals_clears_globals(self): # Check for issue #313, whereby clearing f_locals also clear f_globals # if in the top-frame. This cannot be done inside `unittest` as it is always # executing code in a child frame. script = r';'.join([ r"import numexpr as ne", r"a=10", r"ne.evaluate('1')", r"a += 1", r"ne.evaluate('2', local_dict={})", r"a += 1", r"ne.evaluate('3', global_dict={})", r"a += 1", r"ne.evaluate('4', local_dict={}, global_dict={})", r"a += 1", ]) # Raises CalledProcessError on a non-normal exit check = subprocess.check_call([sys.executable, '-c', script]) # Ideally this test should also be done against ipython but it's not # a requirement. class test_numexpr2(test_numexpr): """Testing with 2 threads""" nthreads = 2 class test_evaluate(TestCase): def test_simple(self): a = array([1., 2., 3.]) b = array([4., 5., 6.]) c = array([7., 8., 9.]) x = evaluate("2*a + 3*b*c") assert_array_equal(x, array([86., 124., 168.])) def test_simple_expr_small_array(self): x = arange(100.0) y = evaluate("x") assert_array_equal(x, y) def test_simple_expr(self): x = arange(1e6) y = evaluate("x") assert_array_equal(x, y) def test_re_evaluate(self): a = array([1., 2., 3.]) b = array([4., 5., 6.]) c = array([7., 8., 9.]) x = evaluate("2*a + 3*b*c") x = re_evaluate() assert_array_equal(x, array([86., 124., 168.])) def test_re_evaluate_dict(self): a = array([1., 2., 3.]) b = array([4., 5., 6.]) c = array([7., 8., 9.]) x = evaluate("2*a + 3*b*c", local_dict={'a': a, 'b': b, 'c': c}) x = re_evaluate() assert_array_equal(x, array([86., 124., 168.])) # Test for issue #37 if sys.version_info[0] < 3: # In python 3 '/' perforns true division, not integer division. # Integer division '//' is still not suppoerted by numexpr def test_zero_div(self): x = arange(100, dtype='i4') y = evaluate("1/x") x2 = zeros(100, dtype='i4') x2[1] = 1 assert_array_equal(x2, y) # Test for issue #22 def test_true_div(self): x = arange(10, dtype='i4') assert_array_equal(evaluate("x/2"), x / 2) assert_array_equal(evaluate("x/2", truediv=False), x / 2) assert_array_equal(evaluate("x/2", truediv='auto'), x / 2) assert_array_equal(evaluate("x/2", truediv=True), x / 2.0) def test_left_shift(self): x = arange(10, dtype='i4') assert_array_equal(evaluate("x<<2"), x << 2) def test_right_shift(self): x = arange(10, dtype='i4') assert_array_equal(evaluate("x>>2"), x >> 2) # PyTables uses __nonzero__ among ExpressionNode objects internally # so this should be commented out for the moment. See #24. def test_boolean_operator(self): x = arange(10, dtype='i4') try: evaluate("(x > 1) and (x < 9)") except TypeError: pass else: raise ValueError("should raise exception!") def test_rational_expr(self): a = arange(1e6) b = arange(1e6) * 0.1 x = (a + 2 * b) / (1 + a + 4 * b * b) y = evaluate("(a + 2*b) / (1 + a + 4*b*b)") assert_array_almost_equal(x, y) def test_complex_expr(self): def complex(a, b): c = zeros(a.shape, dtype=complex_) c.real = a c.imag = b return c a = arange(1e4) b = arange(1e4) ** 1e-5 z = a + 1j * b x = z.imag x = sin(complex(a, b)).real + z.imag y = evaluate("sin(complex(a, b)).real + z.imag") assert_array_almost_equal(x, y) def test_complex_strides(self): a = arange(100).reshape(10, 10)[::2] b = arange(50).reshape(5, 10) assert_array_equal(evaluate("a+b"), a + b) c = empty([10], dtype=[('c1', int32), ('c2', uint16)]) c['c1'] = arange(10) c['c2'].fill(0xaaaa) c1 = c['c1'] a0 = a[0] assert_array_equal(evaluate("c1"), c1) assert_array_equal(evaluate("a0+c1"), a0 + c1) def test_recarray_strides(self): a = arange(100) b = arange(100,200) recarr = np.rec.array(None, formats='f4,f4', shape=(100,)) recarr['f0'] = a recarr['f1'] = b c = recarr['f1'] assert_array_almost_equal(evaluate("sqrt(c) > 1."), sqrt(c) > 1.) assert_array_almost_equal(evaluate("log10(c)"), log10(c)) def test_broadcasting(self): a = arange(100).reshape(10, 10)[::2] c = arange(10) d = arange(5).reshape(5, 1) assert_array_equal(evaluate("a+c"), a + c) assert_array_equal(evaluate("a+d"), a + d) expr = NumExpr("2.0*a+3.0*c", [('a', double), ('c', double)]) assert_array_equal(expr(a, c), 2.0 * a + 3.0 * c) def test_all_scalar(self): a = 3. b = 4. assert_allclose(evaluate("a+b"), a + b) expr = NumExpr("2*a+3*b", [('a', double), ('b', double)]) assert_equal(expr(a, b), 2 * a + 3 * b) def test_run(self): a = arange(100).reshape(10, 10)[::2] b = arange(10) expr = NumExpr("2*a+3*b", [('a', double), ('b', double)]) assert_array_equal(expr(a, b), expr.run(a, b)) def test_illegal_value(self): a = arange(3) try: evaluate("a < [0, 0, 0]") except TypeError: pass else: self.fail() def test_ex_uses_vml(self): vml_funcs = [ "sin", "cos", "tan", "arcsin", "arccos", "arctan", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", "log", "log1p","log10", "exp", "expm1", "abs", "conj", "arctan2", "fmod"] for func in vml_funcs: strexpr = func+'(a)' _, ex_uses_vml = numexpr.necompiler.getExprNames(strexpr, {}) assert_equal(ex_uses_vml, use_vml, strexpr) if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads # during the rest of the execution. See #33 for details. def test_changing_nthreads_00_inc(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 for nthreads in range(1, 7): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 for nthreads in range(6, 1, -1): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) tests = [ ('MISC', ['b*c+d*e', '2*a+3*b', '-a', 'sinh(a)', '2*a + (cos(3)+5)*sinh(cos(b))', '2*a + arctan2(a, b)', 'arcsin(0.5)', 'where(a != 0.0, 2, a)', 'where(a > 10, b < a, b > a)', 'where((a-10).real != 0.0, a, 2)', '0.25 * (a < 5) + 0.33 * (a >= 5)', 'cos(1+1)', '1+1', '1', 'cos(a2)', ])] optests = [] for op in list('+-*/%') + ['**']: optests.append("(a+1) %s (b+3)" % op) optests.append("3 %s (b+3)" % op) optests.append("(a+1) %s 4" % op) optests.append("2 %s (b+3)" % op) optests.append("(a+1) %s 2" % op) optests.append("(a+1) %s -1" % op) optests.append("(a+1) %s 0.5" % op) # Check divisions and modulus by zero (see ticket #107) optests.append("(a+1) %s 0" % op) tests.append(('OPERATIONS', optests)) cmptests = [] for op in ['<', '<=', '==', '>=', '>', '!=']: cmptests.append("a/2+5 %s b" % op) cmptests.append("a/2+5 %s 7" % op) cmptests.append("7 %s b" % op) cmptests.append("7.0 %s 5" % op) tests.append(('COMPARISONS', cmptests)) func1tests = [] for func in ['copy', 'ones_like', 'sqrt', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', 'log', 'log1p', 'log10', 'exp', 'expm1', 'abs', 'conj', 'ceil', 'floor']: func1tests.append("a + %s(b+c)" % func) tests.append(('1_ARG_FUNCS', func1tests)) func2tests = [] for func in ['arctan2', 'fmod']: func2tests.append("a + %s(b+c, d+1)" % func) func2tests.append("a + %s(b+c, 1)" % func) func2tests.append("a + %s(1, d+1)" % func) tests.append(('2_ARG_FUNCS', func2tests)) powtests = [] # n = -1, 0.5, 2, 4 already handled in section "OPERATIONS" for n in (-7, -2.5, -1.5, -1.3, -.5, 0, 0.0, 1, 2.3, 2.5, 3): powtests.append("(a+1)**%s" % n) tests.append(('POW_TESTS', powtests)) def equal(a, b, exact): if array_equal(a, b): return True if hasattr(a, 'dtype') and a.dtype in ['f4', 'f8']: nnans = isnan(a).sum() if nnans > 0: # For results containing NaNs, just check that the number # of NaNs is the same in both arrays. This check could be # made more exhaustive, but checking element by element in # python space is very expensive in general. return nnans == isnan(b).sum() ninfs = isinf(a).sum() if ninfs > 0: # Ditto for Inf's return ninfs == isinf(b).sum() if exact: return (shape(a) == shape(b)) and alltrue(ravel(a) == ravel(b), axis=0) else: if hasattr(a, 'dtype') and a.dtype == 'f4': atol = 1e-5 # Relax precission for special opcodes, like fmod else: atol = 1e-8 return (shape(a) == shape(b) and allclose(ravel(a), ravel(b), atol=atol)) class Skip(Exception): pass def test_expressions(): test_no = [0] def make_test_method(a, a2, b, c, d, e, x, expr, test_scalar, dtype, optimization, exact, section): this_locals = locals() def method(): try: # We don't want to listen at RuntimeWarnings like # "overflows" or "divide by zero" in plain eval(). warnings.simplefilter("ignore") npval = eval(expr, globals(), this_locals) warnings.simplefilter("always") npval = eval(expr, globals(), this_locals) except Exception as ex: # just store the exception in a variable # compatibility with numpy v1.12 # see also https://github.com/pydata/numexpr/issues/239 np_exception = ex npval = None else: np_exception = None try: neval = evaluate(expr, local_dict=this_locals, optimization=optimization) except AssertionError: raise except NotImplementedError: print('%r not implemented for %s (scalar=%d, opt=%s)' % (expr, dtype.__name__, test_scalar, optimization)) except Exception as ne_exception: same_exc_type = issubclass(type(ne_exception), type(np_exception)) if np_exception is None or not same_exc_type: print('numexpr error for expression %r' % (expr,)) raise except: print('numexpr error for expression %r' % (expr,)) raise else: msg = ('expected numexpr error not raised for expression ' '%r' % (expr,)) assert np_exception is None, msg assert equal(npval, neval, exact), """%r (test_scalar=%r, dtype=%r, optimization=%r, exact=%r, npval=%r (%r - %r)\n neval=%r (%r - %r))""" % (expr, test_scalar, dtype.__name__, optimization, exact, npval, type(npval), shape(npval), neval, type(neval), shape(neval)) method.description = ('test_expressions(%s, test_scalar=%r, ' 'dtype=%r, optimization=%r, exact=%r)') % (expr, test_scalar, dtype.__name__, optimization, exact) test_no[0] += 1 method.__name__ = 'test_scalar%d_%s_%s_%s_%04d' % (test_scalar, dtype.__name__, optimization.encode('ascii'), section.encode('ascii'), test_no[0]) return method x = None for test_scalar in (0, 1, 2): for dtype in (int, long, np.float32, double, complex): array_size = 100 a = arange(2 * array_size, dtype=dtype)[::2] a2 = zeros([array_size, array_size], dtype=dtype) b = arange(array_size, dtype=dtype) / array_size c = arange(array_size, dtype=dtype) d = arange(array_size, dtype=dtype) e = arange(array_size, dtype=dtype) if dtype == complex: a = a.real for x in [a2, b, c, d, e]: x += 1j x *= 1 + 1j if test_scalar == 1: a = a[array_size // 2] if test_scalar == 2: b = b[array_size // 2] for optimization, exact in [ ('none', False), ('moderate', False), ('aggressive', False)]: for section_name, section_tests in tests: for expr in section_tests: if (dtype == complex and ('<' in expr or '>' in expr or '%' in expr or "arctan2" in expr or "fmod" in expr or "floor" in expr or "ceil" in expr)): # skip complex comparisons or functions not # defined in complex domain. continue if (dtype in (int, long) and test_scalar and expr == '(a+1) ** -1'): continue m = make_test_method(a, a2, b, c, d, e, x, expr, test_scalar, dtype, optimization, exact, section_name) yield m class test_int64(TestCase): def test_neg(self): a = array([2 ** 31 - 1, 2 ** 31, 2 ** 32, 2 ** 63 - 1], dtype=int64) res = evaluate('-a') assert_array_equal(res, [1 - 2 ** 31, -(2 ** 31), -(2 ** 32), 1 - 2 ** 63]) self.assertEqual(res.dtype.name, 'int64') class test_int32_int64(TestCase): if sys.version_info[0] < 2: # no long literals in python 3 def test_small_long(self): # Small longs should not be downgraded to ints. res = evaluate('42L') assert_array_equal(res, 42) self.assertEqual(res.dtype.name, 'int64') def test_small_int(self): # Small ints (32-bit ones) should not be promoted to longs. res = evaluate('2') assert_array_equal(res, 2) self.assertEqual(res.dtype.name, 'int32') def test_big_int(self): # Big ints should be promoted to longs. res = evaluate('2**40') assert_array_equal(res, 2 ** 40) self.assertEqual(res.dtype.name, 'int64') def test_long_constant_promotion(self): int32array = arange(100, dtype='int32') itwo = np.int32(2) ltwo = np.int64(2) res = int32array * 2 res32 = evaluate('int32array * itwo') res64 = evaluate('int32array * ltwo') assert_array_equal(res, res32) assert_array_equal(res, res64) self.assertEqual(res32.dtype.name, 'int32') self.assertEqual(res64.dtype.name, 'int64') def test_int64_array_promotion(self): int32array = arange(100, dtype='int32') int64array = arange(100, dtype='int64') respy = int32array * int64array resnx = evaluate('int32array * int64array') assert_array_equal(respy, resnx) self.assertEqual(resnx.dtype.name, 'int64') class test_uint32_int64(TestCase): def test_small_uint32(self): # Small uint32 should not be downgraded to ints. a = np.uint32(42) res = evaluate('a') assert_array_equal(res, 42) self.assertEqual(res.dtype.name, 'int64') def test_uint32_constant_promotion(self): int32array = arange(100, dtype='int32') stwo = np.int32(2) utwo = np.uint32(2) res = int32array * utwo res32 = evaluate('int32array * stwo') res64 = evaluate('int32array * utwo') assert_array_equal(res, res32) assert_array_equal(res, res64) self.assertEqual(res32.dtype.name, 'int32') self.assertEqual(res64.dtype.name, 'int64') def test_int64_array_promotion(self): uint32array = arange(100, dtype='uint32') int64array = arange(100, dtype='int64') respy = uint32array * int64array resnx = evaluate('uint32array * int64array') assert_array_equal(respy, resnx) self.assertEqual(resnx.dtype.name, 'int64') class test_strings(TestCase): BLOCK_SIZE1 = 128 BLOCK_SIZE2 = 8 str_list1 = [b'foo', b'bar', b'', b' '] str_list2 = [b'foo', b'', b'x', b' '] str_nloops = len(str_list1) * (BLOCK_SIZE1 + BLOCK_SIZE2 + 1) str_array1 = array(str_list1 * str_nloops) str_array2 = array(str_list2 * str_nloops) str_constant = b'doodoo' def test_null_chars(self): str_list = [ b'\0\0\0', b'\0\0foo\0', b'\0\0foo\0b', b'\0\0foo\0b\0', b'foo\0', b'foo\0b', b'foo\0b\0', b'foo\0bar\0baz\0\0'] for s in str_list: r = evaluate('s') self.assertEqual(s, r.tostring()) # check *all* stored data def test_compare_copy(self): sarr = self.str_array1 expr = 'sarr' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_array(self): sarr1 = self.str_array1 sarr2 = self.str_array2 expr = 'sarr1 >= sarr2' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_variable(self): sarr = self.str_array1 svar = self.str_constant expr = 'sarr >= svar' res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_compare_constant(self): sarr = self.str_array1 expr = 'sarr >= %r' % self.str_constant res1 = eval(expr) res2 = evaluate(expr) assert_array_equal(res1, res2) def test_add_string_array(self): sarr1 = self.str_array1 sarr2 = self.str_array2 expr = 'sarr1 + sarr2' self.assert_missing_op('add_sss', expr, locals()) def test_empty_string1(self): a = np.array([b"", b"pepe"]) b = np.array([b"pepe2", b""]) res = evaluate("(a == b'') & (b == b'pepe2')") assert_array_equal(res, np.array([True, False])) res2 = evaluate("(a == b'pepe') & (b == b'')") assert_array_equal(res2, np.array([False, True])) def test_empty_string2(self): a = np.array([b"p", b"pepe"]) b = np.array([b"pepe2", b""]) res = evaluate("(a == b'') & (b == b'pepe2')") assert_array_equal(res, np.array([False, False])) res2 = evaluate("(a == b'pepe') & (b == b'')") assert_array_equal(res, np.array([False, False])) def test_add_numeric_array(self): sarr = self.str_array1 narr = arange(len(sarr), dtype='int32') expr = 'sarr >= narr' self.assert_missing_op('ge_bsi', expr, locals()) def assert_missing_op(self, op, expr, local_dict): msg = "expected NotImplementedError regarding '%s'" % op try: evaluate(expr, local_dict) except NotImplementedError as nie: if "'%s'" % op not in nie.args[0]: self.fail(msg) else: self.fail(msg) def test_compare_prefix(self): # Check comparing two strings where one is a prefix of the # other. for s1, s2 in [(b'foo', b'foobar'), (b'foo', b'foo\0bar'), (b'foo\0a', b'foo\0bar')]: self.assertTrue(evaluate('s1 < s2')) self.assertTrue(evaluate('s1 <= s2')) self.assertTrue(evaluate('~(s1 == s2)')) self.assertTrue(evaluate('~(s1 >= s2)')) self.assertTrue(evaluate('~(s1 > s2)')) # Check for NumPy array-style semantics in string equality. s1, s2 = b'foo', b'foo\0\0' self.assertTrue(evaluate('s1 == s2')) # Case for testing selections in fields which are aligned but whose # data length is not an exact multiple of the length of the record. # The following test exposes the problem only in 32-bit machines, # because in 64-bit machines 'c2' is unaligned. However, this should # check most platforms where, while not unaligned, 'len(datatype) > # boundary_alignment' is fullfilled. class test_irregular_stride(TestCase): def test_select(self): f0 = arange(10, dtype=int32) f1 = arange(10, dtype=float64) irregular = rec.fromarrays([f0, f1]) f0 = irregular['f0'] f1 = irregular['f1'] i0 = evaluate('f0 < 5') i1 = evaluate('f1 < 5') assert_array_equal(f0[i0], arange(5, dtype=int32)) assert_array_equal(f1[i1], arange(5, dtype=float64)) # Cases for testing arrays with dimensions that can be zero. class test_zerodim(TestCase): def test_zerodim1d(self): a0 = array([], dtype=int32) a1 = array([], dtype=float64) r0 = evaluate('a0 + a1') r1 = evaluate('a0 * a1') assert_array_equal(r0, a1) assert_array_equal(r1, a1) def test_zerodim3d(self): a0 = array([], dtype=int32).reshape(0, 2, 4) a1 = array([], dtype=float64).reshape(0, 2, 4) r0 = evaluate('a0 + a1') r1 = evaluate('a0 * a1') assert_array_equal(r0, a1) assert_array_equal(r1, a1) @contextmanager def _environment(key, value): old = os.environ.get(key) os.environ[key] = value try: yield finally: if old: os.environ[key] = old else: del os.environ[key] # Test cases for the threading configuration class test_threading_config(TestCase): def test_max_threads_unset(self): # Has to be done in a subprocess as `importlib.reload` doesn't let us # re-initialize the threadpool script = '\n'.join([ "import os", "if 'NUMEXPR_MAX_THREADS' in os.environ: os.environ.pop('NUMEXPR_MAX_THREADS')", "import numexpr", "assert(numexpr.nthreads <= 8)", "exit(0)"]) subprocess.check_call([sys.executable, '-c', script]) def test_max_threads_set(self): # Has to be done in a subprocess as `importlib.reload` doesn't let us # re-initialize the threadpool script = '\n'.join([ "import os", "os.environ['NUMEXPR_MAX_THREADS'] = '4'", "import numexpr", "assert(numexpr.MAX_THREADS == 4)", "exit(0)"]) subprocess.check_call([sys.executable, '-c', script]) def test_numexpr_num_threads(self): with _environment('OMP_NUM_THREADS', '5'): # NUMEXPR_NUM_THREADS has priority with _environment('NUMEXPR_NUM_THREADS', '3'): self.assertEquals(3, numexpr._init_num_threads()) def test_omp_num_threads(self): with _environment('OMP_NUM_THREADS', '5'): self.assertEquals(5, numexpr._init_num_threads()) # Case test for threads class test_threading(TestCase): def test_thread(self): import threading class ThreadTest(threading.Thread): def run(self): a = arange(3) assert_array_equal(evaluate('a**3'), array([0, 1, 8])) test = ThreadTest() test.start() test.join() def test_multithread(self): import threading # Running evaluate() from multiple threads shouldn't crash def work(n): a = arange(n) evaluate('a+a') work(10) # warm compilation cache nthreads = 30 threads = [threading.Thread(target=work, args=(1e5,)) for i in range(nthreads)] for t in threads: t.start() for t in threads: t.join() # The worker function for the subprocess (needs to be here because Windows # has problems pickling nested functions with the multiprocess module :-/) def _worker(qout=None): ra = np.arange(1e3) rows = evaluate('ra > 0') #print "Succeeded in evaluation!\n" if qout is not None: qout.put("Done") # Case test for subprocesses (via multiprocessing module) class test_subprocess(TestCase): def test_multiprocess(self): try: import multiprocessing as mp except ImportError: return # Check for two threads at least numexpr.set_num_threads(2) #print "**** Running from main process:" _worker() #print "**** Running from subprocess:" qout = mp.Queue() ps = mp.Process(target=_worker, args=(qout,)) ps.daemon = True ps.start() result = qout.get() #print result def print_versions(): """Print the versions of software that numexpr relies on.""" # from pkg_resources import parse_version from numexpr.cpuinfo import cpu import platform np_version = LooseVersion(np.__version__) if np_version < minimum_numpy_version: print('*Warning*: NumPy version is lower than recommended: %s < %s' % (np_version, minimum_numpy_version)) print('-=' * 38) print('Numexpr version: %s' % numexpr.__version__) print('NumPy version: %s' % np.__version__) print('Python version: %s' % sys.version) (sysname, nodename, release, os_version, machine, processor) = platform.uname() print('Platform: %s-%s-%s' % (sys.platform, machine, os_version)) try: # cpuinfo doesn't work on OSX well it seems, so protect these outputs # with a try block cpu_info = cpu.info[0] print('CPU vendor: %s' % cpu_info.get('VendorIdentifier', '')) print('CPU model: %s' % cpu_info.get('ProcessorNameString', '')) print('CPU clock speed: %s MHz' % cpu_info.get('~MHz','')) except KeyError: pass print('VML available? %s' % use_vml) if use_vml: print('VML/MKL version: %s' % numexpr.get_vml_version()) print('Number of threads used by default: %d ' '(out of %d detected cores)' % (numexpr.nthreads, numexpr.ncores)) print('Maximum number of threads: %s' % numexpr.MAX_THREADS) print('-=' * 38) def test(verbosity=1): """ Run all the tests in the test suite. """ print_versions() # For some reason, NumPy issues all kinds of warnings when using Python3. # Ignoring them in tests should be ok, as all results are checked out. # See https://github.com/pydata/numexpr/issues/183 for details. np.seterr(divide='ignore', invalid='ignore', over='ignore', under='ignore') return unittest.TextTestRunner(verbosity=verbosity).run(suite()) test.__test__ = False def suite(): import unittest import platform as pl theSuite = unittest.TestSuite() niter = 1 class TestExpressions(TestCase): pass def add_method(func): def method(self): return func() setattr(TestExpressions, func.__name__, method.__get__(None, TestExpressions)) for func in test_expressions(): add_method(func) for n in range(niter): theSuite.addTest(unittest.makeSuite(test_numexpr)) if 'sparc' not in platform.machine(): theSuite.addTest(unittest.makeSuite(test_numexpr2)) theSuite.addTest(unittest.makeSuite(test_evaluate)) theSuite.addTest(unittest.makeSuite(TestExpressions)) theSuite.addTest(unittest.makeSuite(test_int32_int64)) theSuite.addTest(unittest.makeSuite(test_uint32_int64)) theSuite.addTest(unittest.makeSuite(test_strings)) theSuite.addTest( unittest.makeSuite(test_irregular_stride)) theSuite.addTest(unittest.makeSuite(test_zerodim)) theSuite.addTest(unittest.makeSuite(test_threading_config)) # multiprocessing module is not supported on Hurd/kFreeBSD if (pl.system().lower() not in ('gnu', 'gnu/kfreebsd')): theSuite.addTest(unittest.makeSuite(test_subprocess)) # I need to put this test after test_subprocess because # if not, the test suite locks immediately before test_subproces. # This only happens with Windows, so I suspect of a subtle bad # interaction with threads and subprocess :-/ theSuite.addTest(unittest.makeSuite(test_threading)) return theSuite if __name__ == '__main__': print_versions() unittest.main(defaultTest='suite') # suite = suite() # unittest.TextTestRunner(verbosity=2).run(suite) numexpr-2.7.1/numexpr/utils.py000066400000000000000000000157741360375525100164540ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import logging log = logging.getLogger(__name__) import os import subprocess import platform from numexpr.interpreter import _set_num_threads, MAX_THREADS from numexpr import use_vml if use_vml: from numexpr.interpreter import ( _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads) def get_vml_version(): """Get the VML/MKL library version.""" if use_vml: return _get_vml_version() else: return None def set_vml_accuracy_mode(mode): """ Set the accuracy mode for VML operations. The `mode` parameter can take the values: - 'high': high accuracy mode (HA), <1 least significant bit - 'low': low accuracy mode (LA), typically 1-2 least significant bits - 'fast': enhanced performance mode (EP) - None: mode settings are ignored This call is equivalent to the `vmlSetMode()` in the VML library. See: http://www.intel.com/software/products/mkl/docs/webhelp/vml/vml_DataTypesAccuracyModes.html for more info on the accuracy modes. Returns old accuracy settings. """ if use_vml: acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} if mode not in acc_dict.keys(): raise ValueError( "mode argument must be one of: None, 'high', 'low', 'fast'") retval = _set_vml_accuracy_mode(acc_dict.get(mode, 0)) return acc_reverse_dict.get(retval) else: return None def set_vml_num_threads(nthreads): """ Suggests a maximum number of threads to be used in VML operations. This function is equivalent to the call `mkl_domain_set_num_threads(nthreads, MKL_DOMAIN_VML)` in the MKL library. See: http://www.intel.com/software/products/mkl/docs/webhelp/support/functn_mkl_domain_set_num_threads.html for more info about it. """ if use_vml: _set_vml_num_threads(nthreads) def set_num_threads(nthreads): """ Sets a number of threads to be used in operations. Returns the previous setting for the number of threads. During initialization time Numexpr sets this number to the number of detected cores in the system (see `detect_number_of_cores()`). If you are using Intel's VML, you may want to use `set_vml_num_threads(nthreads)` to perform the parallel job with VML instead. However, you should get very similar performance with VML-optimized functions, and VML's parallelizer cannot deal with common expresions like `(x+1)*(x-2)`, while Numexpr's one can. """ old_nthreads = _set_num_threads(nthreads) return old_nthreads def _init_num_threads(): """ Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or 'OMP_NUM_THREADS' env vars to set the initial number of threads used by the virtual machine. """ # Any platform-specific short-circuits if 'sparc' in platform.machine(): log.warning('The number of threads have been set to 1 because problems related ' 'to threading have been reported on some sparc machine. ' 'The number of threads can be changed using the "set_num_threads" ' 'function.') set_num_threads(1) return 1 env_configured = False n_cores = detect_number_of_cores() if 'NUMEXPR_MAX_THREADS' in os.environ: # The user has configured NumExpr in the expected way, so suppress logs. env_configured = True n_cores = MAX_THREADS else: # The use has not set 'NUMEXPR_MAX_THREADS', so likely they have not # configured NumExpr as desired, so we emit info logs. if n_cores > MAX_THREADS: log.info('Note: detected %d virtual cores but NumExpr set to maximum of %d, check "NUMEXPR_MAX_THREADS" environment variable.'%(n_cores, MAX_THREADS)) if n_cores > 8: # The historical 'safety' limit. log.info('Note: NumExpr detected %d cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.'%n_cores) n_cores = 8 # Now we check for 'NUMEXPR_NUM_THREADS' or 'OMP_NUM_THREADS' to set the # actual number of threads used. if 'NUMEXPR_NUM_THREADS' in os.environ: requested_threads = int(os.environ['NUMEXPR_NUM_THREADS']) elif 'OMP_NUM_THREADS' in os.environ: requested_threads = int(os.environ['OMP_NUM_THREADS']) else: requested_threads = n_cores if not env_configured: log.info('NumExpr defaulting to %d threads.'%n_cores) # The C-extension function performs its own checks against `MAX_THREADS` set_num_threads(requested_threads) return requested_threads def detect_number_of_cores(): """ Detects the number of cores on a system. Cribbed from pp. """ # Linux, Unix and MacOS: if hasattr(os, "sysconf"): if "SC_NPROCESSORS_ONLN" in os.sysconf_names: # Linux & Unix: ncpus = os.sysconf("SC_NPROCESSORS_ONLN") if isinstance(ncpus, int) and ncpus > 0: return ncpus else: # OSX: return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) # Windows: try: ncpus = int(os.environ.get("NUMBER_OF_PROCESSORS", "")) if ncpus > 0: return ncpus except ValueError: pass return 1 # Default def detect_number_of_threads(): """ DEPRECATED: use `_init_num_threads` instead. If this is modified, please update the note in: https://github.com/pydata/numexpr/wiki/Numexpr-Users-Guide """ log.warning('Deprecated, use `init_num_threads` instead.') try: nthreads = int(os.environ.get('NUMEXPR_NUM_THREADS', '')) except ValueError: try: nthreads = int(os.environ.get('OMP_NUM_THREADS', '')) except ValueError: nthreads = detect_number_of_cores() # Check that we don't surpass the MAX_THREADS in interpreter.cpp if nthreads > MAX_THREADS: nthreads = MAX_THREADS return nthreads class CacheDict(dict): """ A dictionary that prevents itself from growing too much. """ def __init__(self, maxentries): self.maxentries = maxentries super(CacheDict, self).__init__(self) def __setitem__(self, key, value): # Protection against growing the cache too much if len(self) > self.maxentries: # Remove a 10% of (arbitrary) elements from the cache entries_to_remove = self.maxentries // 10 for k in self.keys()[:entries_to_remove]: super(CacheDict, self).__delitem__(k) super(CacheDict, self).__setitem__(key, value) numexpr-2.7.1/numexpr/version.py000066400000000000000000000005561360375525100167710ustar00rootroot00000000000000################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### version = '2.7.1' numexpr-2.7.1/numexpr/win32/000077500000000000000000000000001360375525100156665ustar00rootroot00000000000000numexpr-2.7.1/numexpr/win32/pthread.c000066400000000000000000000156551360375525100174750ustar00rootroot00000000000000/* * Code for simulating pthreads API on Windows. This is Git-specific, * but it is enough for Numexpr needs too. * * Copyright (C) 2009 Andrzej K. Haczewski * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * DISCLAIMER: The implementation is Git-specific, it is subset of original * Pthreads API, without lots of other features that Git doesn't use. * Git also makes sure that the passed arguments are valid, so there's * no need for double-checking. */ #include "pthread.h" #include #include #include #include #include void die(const char *err, ...) { printf("%s", err); exit(-1); } static unsigned __stdcall win32_start_routine(void *arg) { pthread_t *thread = arg; thread->arg = thread->start_routine(thread->arg); return 0; } int pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg) { thread->arg = arg; thread->start_routine = start_routine; thread->handle = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL); if (!thread->handle) return errno; else return 0; } int win32_pthread_join(pthread_t *thread, void **value_ptr) { DWORD result = WaitForSingleObject(thread->handle, INFINITE); switch (result) { case WAIT_OBJECT_0: if (value_ptr) *value_ptr = thread->arg; return 0; case WAIT_ABANDONED: return EINVAL; default: return GetLastError(); } } int pthread_cond_init(pthread_cond_t *cond, const void *unused) { cond->waiters = 0; cond->was_broadcast = 0; InitializeCriticalSection(&cond->waiters_lock); cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); if (!cond->sema) die("CreateSemaphore() failed"); cond->continue_broadcast = CreateEvent(NULL, /* security */ FALSE, /* auto-reset */ FALSE, /* not signaled */ NULL); /* name */ if (!cond->continue_broadcast) die("CreateEvent() failed"); return 0; } int pthread_cond_destroy(pthread_cond_t *cond) { CloseHandle(cond->sema); CloseHandle(cond->continue_broadcast); DeleteCriticalSection(&cond->waiters_lock); return 0; } int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex) { int last_waiter; EnterCriticalSection(&cond->waiters_lock); cond->waiters++; LeaveCriticalSection(&cond->waiters_lock); /* * Unlock external mutex and wait for signal. * NOTE: we've held mutex locked long enough to increment * waiters count above, so there's no problem with * leaving mutex unlocked before we wait on semaphore. */ LeaveCriticalSection(mutex); /* let's wait - ignore return value */ WaitForSingleObject(cond->sema, INFINITE); /* * Decrease waiters count. If we are the last waiter, then we must * notify the broadcasting thread that it can continue. * But if we continued due to cond_signal, we do not have to do that * because the signaling thread knows that only one waiter continued. */ EnterCriticalSection(&cond->waiters_lock); cond->waiters--; last_waiter = cond->was_broadcast && cond->waiters == 0; LeaveCriticalSection(&cond->waiters_lock); if (last_waiter) { /* * cond_broadcast was issued while mutex was held. This means * that all other waiters have continued, but are contending * for the mutex at the end of this function because the * broadcasting thread did not leave cond_broadcast, yet. * (This is so that it can be sure that each waiter has * consumed exactly one slice of the semaphor.) * The last waiter must tell the broadcasting thread that it * can go on. */ SetEvent(cond->continue_broadcast); /* * Now we go on to contend with all other waiters for * the mutex. Auf in den Kampf! */ } /* lock external mutex again */ EnterCriticalSection(mutex); return 0; } /* * IMPORTANT: This implementation requires that pthread_cond_signal * is called while the mutex is held that is used in the corresponding * pthread_cond_wait calls! */ int pthread_cond_signal(pthread_cond_t *cond) { int have_waiters; EnterCriticalSection(&cond->waiters_lock); have_waiters = cond->waiters > 0; LeaveCriticalSection(&cond->waiters_lock); /* * Signal only when there are waiters */ if (have_waiters) return ReleaseSemaphore(cond->sema, 1, NULL) ? 0 : GetLastError(); else return 0; } /* * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast * is called while the mutex is held that is used in the corresponding * pthread_cond_wait calls! */ int pthread_cond_broadcast(pthread_cond_t *cond) { EnterCriticalSection(&cond->waiters_lock); if ((cond->was_broadcast = cond->waiters > 0)) { /* wake up all waiters */ ReleaseSemaphore(cond->sema, cond->waiters, NULL); LeaveCriticalSection(&cond->waiters_lock); /* * At this point all waiters continue. Each one takes its * slice of the semaphor. Now it's our turn to wait: Since * the external mutex is held, no thread can leave cond_wait, * yet. For this reason, we can be sure that no thread gets * a chance to eat *more* than one slice. OTOH, it means * that the last waiter must send us a wake-up. */ WaitForSingleObject(cond->continue_broadcast, INFINITE); /* * Since the external mutex is held, no thread can enter * cond_wait, and, hence, it is safe to reset this flag * without cond->waiters_lock held. */ cond->was_broadcast = 0; } else { LeaveCriticalSection(&cond->waiters_lock); } return 0; } numexpr-2.7.1/numexpr/win32/pthread.h000066400000000000000000000064131360375525100174720ustar00rootroot00000000000000/* * Code for simulating pthreads API on Windows. This is Git-specific, * but it is enough for Numexpr needs too. * * Copyright (C) 2009 Andrzej K. Haczewski * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * DISCLAIMER: The implementation is Git-specific, it is subset of original * Pthreads API, without lots of other features that Git doesn't use. * Git also makes sure that the passed arguments are valid, so there's * no need for double-checking. */ #ifndef PTHREAD_H #define PTHREAD_H #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #ifdef __cplusplus extern "C" { #endif /* * Defines that adapt Windows API threads to pthreads API */ #define pthread_mutex_t CRITICAL_SECTION #define pthread_mutex_init(a,b) InitializeCriticalSection((a)) #define pthread_mutex_destroy(a) DeleteCriticalSection((a)) #define pthread_mutex_lock EnterCriticalSection #define pthread_mutex_unlock LeaveCriticalSection /* * Implement simple condition variable for Windows threads, based on ACE * implementation. * * See original implementation: http://bit.ly/1vkDjo * ACE homepage: http://www.cse.wustl.edu/~schmidt/ACE.html * See also: http://www.cse.wustl.edu/~schmidt/win32-cv-1.html */ typedef struct { LONG waiters; int was_broadcast; CRITICAL_SECTION waiters_lock; HANDLE sema; HANDLE continue_broadcast; } pthread_cond_t; extern int pthread_cond_init(pthread_cond_t *cond, const void *unused); extern int pthread_cond_destroy(pthread_cond_t *cond); extern int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex); extern int pthread_cond_signal(pthread_cond_t *cond); extern int pthread_cond_broadcast(pthread_cond_t *cond); /* * Simple thread creation implementation using pthread API */ typedef struct { HANDLE handle; void *(*start_routine)(void*); void *arg; } pthread_t; extern int pthread_create(pthread_t *thread, const void *unused, void *(*start_routine)(void*), void *arg); /* * To avoid the need of copying a struct, we use small macro wrapper to pass * pointer to win32_pthread_join instead. */ #define pthread_join(a, b) win32_pthread_join(&(a), (b)) extern int win32_pthread_join(pthread_t *thread, void **value_ptr); #ifdef __cplusplus } // extern "C" #endif #endif /* PTHREAD_H */ numexpr-2.7.1/numexpr/win32/stdint.h000066400000000000000000000162421360375525100173510ustar00rootroot00000000000000/* ISO C9x 7.18 Integer types * Based on ISO/IEC SC22/WG14 9899 Committee draft (SC22 N2794) * * THIS SOFTWARE IS NOT COPYRIGHTED * * Contributor: Danny Smith * * This source code is offered for use in the public domain. You may * use, modify or distribute it freely. * * This code is distributed in the hope that it will be useful but * WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY * DISCLAIMED. This includes but is not limited to warranties of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Date: 2000-12-02 * * mwb: This was modified in the following ways: * * - make it compatible with Visual C++ 6 (which uses * non-standard keywords and suffixes for 64-bit types) * - some environments need stddef.h included (for wchar stuff?) * - handle the fact that Microsoft's limits.h header defines * SIZE_MAX * - make corrections for SIZE_MAX, INTPTR_MIN, INTPTR_MAX, UINTPTR_MAX, * PTRDIFF_MIN, PTRDIFF_MAX, SIG_ATOMIC_MIN, and SIG_ATOMIC_MAX * to be 64-bit aware. */ #ifndef _STDINT_H #define _STDINT_H #define __need_wint_t #define __need_wchar_t #include #include #if _MSC_VER && (_MSC_VER < 1300) /* using MSVC 6 or earlier - no "long long" type, but might have _int64 type */ #define __STDINT_LONGLONG __int64 #define __STDINT_LONGLONG_SUFFIX i64 #else #define __STDINT_LONGLONG long long #define __STDINT_LONGLONG_SUFFIX LL #endif #if !defined( PASTE) #define PASTE2( x, y) x##y #define PASTE( x, y) PASTE2( x, y) #endif /* PASTE */ /* 7.18.1.1 Exact-width integer types */ typedef signed char int8_t; typedef unsigned char uint8_t; typedef short int16_t; typedef unsigned short uint16_t; typedef int int32_t; typedef unsigned uint32_t; typedef __STDINT_LONGLONG int64_t; typedef unsigned __STDINT_LONGLONG uint64_t; /* 7.18.1.2 Minimum-width integer types */ typedef signed char int_least8_t; typedef unsigned char uint_least8_t; typedef short int_least16_t; typedef unsigned short uint_least16_t; typedef int int_least32_t; typedef unsigned uint_least32_t; typedef __STDINT_LONGLONG int_least64_t; typedef unsigned __STDINT_LONGLONG uint_least64_t; /* 7.18.1.3 Fastest minimum-width integer types * Not actually guaranteed to be fastest for all purposes * Here we use the exact-width types for 8 and 16-bit ints. */ typedef char int_fast8_t; typedef unsigned char uint_fast8_t; typedef short int_fast16_t; typedef unsigned short uint_fast16_t; typedef int int_fast32_t; typedef unsigned int uint_fast32_t; typedef __STDINT_LONGLONG int_fast64_t; typedef unsigned __STDINT_LONGLONG uint_fast64_t; /* 7.18.1.4 Integer types capable of holding object pointers */ #ifndef _INTPTR_T_DEFINED #define _INTPTR_T_DEFINED #ifdef _WIN64 typedef __STDINT_LONGLONG intptr_t #else typedef int intptr_t; #endif /* _WIN64 */ #endif /* _INTPTR_T_DEFINED */ #ifndef _UINTPTR_T_DEFINED #define _UINTPTR_T_DEFINED #ifdef _WIN64 typedef unsigned __STDINT_LONGLONG uintptr_t #else typedef unsigned int uintptr_t; #endif /* _WIN64 */ #endif /* _UINTPTR_T_DEFINED */ /* 7.18.1.5 Greatest-width integer types */ typedef __STDINT_LONGLONG intmax_t; typedef unsigned __STDINT_LONGLONG uintmax_t; /* 7.18.2 Limits of specified-width integer types */ #if !defined ( __cplusplus) || defined (__STDC_LIMIT_MACROS) /* 7.18.2.1 Limits of exact-width integer types */ #define INT8_MIN (-128) #define INT16_MIN (-32768) #define INT32_MIN (-2147483647 - 1) #define INT64_MIN (PASTE( -9223372036854775807, __STDINT_LONGLONG_SUFFIX) - 1) #define INT8_MAX 127 #define INT16_MAX 32767 #define INT32_MAX 2147483647 #define INT64_MAX (PASTE( 9223372036854775807, __STDINT_LONGLONG_SUFFIX)) #define UINT8_MAX 0xff /* 255U */ #define UINT16_MAX 0xffff /* 65535U */ #define UINT32_MAX 0xffffffff /* 4294967295U */ #define UINT64_MAX (PASTE( 0xffffffffffffffffU, __STDINT_LONGLONG_SUFFIX)) /* 18446744073709551615ULL */ /* 7.18.2.2 Limits of minimum-width integer types */ #define INT_LEAST8_MIN INT8_MIN #define INT_LEAST16_MIN INT16_MIN #define INT_LEAST32_MIN INT32_MIN #define INT_LEAST64_MIN INT64_MIN #define INT_LEAST8_MAX INT8_MAX #define INT_LEAST16_MAX INT16_MAX #define INT_LEAST32_MAX INT32_MAX #define INT_LEAST64_MAX INT64_MAX #define UINT_LEAST8_MAX UINT8_MAX #define UINT_LEAST16_MAX UINT16_MAX #define UINT_LEAST32_MAX UINT32_MAX #define UINT_LEAST64_MAX UINT64_MAX /* 7.18.2.3 Limits of fastest minimum-width integer types */ #define INT_FAST8_MIN INT8_MIN #define INT_FAST16_MIN INT16_MIN #define INT_FAST32_MIN INT32_MIN #define INT_FAST64_MIN INT64_MIN #define INT_FAST8_MAX INT8_MAX #define INT_FAST16_MAX INT16_MAX #define INT_FAST32_MAX INT32_MAX #define INT_FAST64_MAX INT64_MAX #define UINT_FAST8_MAX UINT8_MAX #define UINT_FAST16_MAX UINT16_MAX #define UINT_FAST32_MAX UINT32_MAX #define UINT_FAST64_MAX UINT64_MAX /* 7.18.2.4 Limits of integer types capable of holding object pointers */ #ifdef _WIN64 #define INTPTR_MIN INT64_MIN #define INTPTR_MAX INT64_MAX #define UINTPTR_MAX UINT64_MAX #else #define INTPTR_MIN INT32_MIN #define INTPTR_MAX INT32_MAX #define UINTPTR_MAX UINT32_MAX #endif /* _WIN64 */ /* 7.18.2.5 Limits of greatest-width integer types */ #define INTMAX_MIN INT64_MIN #define INTMAX_MAX INT64_MAX #define UINTMAX_MAX UINT64_MAX /* 7.18.3 Limits of other integer types */ #define PTRDIFF_MIN INTPTR_MIN #define PTRDIFF_MAX INTPTR_MAX #define SIG_ATOMIC_MIN INTPTR_MIN #define SIG_ATOMIC_MAX INTPTR_MAX /* we need to check for SIZE_MAX already defined because MS defines it in limits.h */ #ifndef SIZE_MAX #define SIZE_MAX UINTPTR_MAX #endif #ifndef WCHAR_MIN /* also in wchar.h */ #define WCHAR_MIN 0 #define WCHAR_MAX ((wchar_t)-1) /* UINT16_MAX */ #endif /* * wint_t is unsigned short for compatibility with MS runtime */ #define WINT_MIN 0 #define WINT_MAX ((wint_t)-1) /* UINT16_MAX */ #endif /* !defined ( __cplusplus) || defined __STDC_LIMIT_MACROS */ /* 7.18.4 Macros for integer constants */ #if !defined ( __cplusplus) || defined (__STDC_CONSTANT_MACROS) /* 7.18.4.1 Macros for minimum-width integer constants Accoding to Douglas Gwyn : "This spec was changed in ISO/IEC 9899:1999 TC1; in ISO/IEC 9899:1999 as initially published, the expansion was required to be an integer constant of precisely matching type, which is impossible to accomplish for the shorter types on most platforms, because C99 provides no standard way to designate an integer constant with width less than that of type int. TC1 changed this to require just an integer constant *expression* with *promoted* type." */ #define INT8_C(val) ((int8_t) + (val)) #define UINT8_C(val) ((uint8_t) + (val##U)) #define INT16_C(val) ((int16_t) + (val)) #define UINT16_C(val) ((uint16_t) + (val##U)) #define INT32_C(val) val##L #define UINT32_C(val) val##UL #define INT64_C(val) (PASTE( val, __STDINT_LONGLONG_SUFFIX)) #define UINT64_C(val)(PASTE( PASTE( val, U), __STDINT_LONGLONG_SUFFIX)) /* 7.18.4.2 Macros for greatest-width integer constants */ #define INTMAX_C(val) INT64_C(val) #define UINTMAX_C(val) UINT64_C(val) #endif /* !defined ( __cplusplus) || defined __STDC_CONSTANT_MACROS */ #endif numexpr-2.7.1/requirements.txt000066400000000000000000000000121360375525100165030ustar00rootroot00000000000000numpy>=1.7numexpr-2.7.1/setup.py000066400000000000000000000210611360375525100147400ustar00rootroot00000000000000#!/usr/bin/env python ################################################################### # Numexpr - Fast numerical array expression evaluator for NumPy. # # License: MIT # Author: See AUTHORS.txt # # See LICENSE.txt and LICENSES/*.txt for details about copyright and # rights to use. #################################################################### import shutil import sys, os, os.path as op, io from distutils.command.clean import clean if sys.version_info < (2, 6): raise RuntimeError("must use python 2.6 or greater") try: import setuptools except ImportError: setuptools = None with open('requirements.txt') as f: requirements = f.read().splitlines() with io.open('README.rst', encoding='utf-8') as f: LONG_DESCRIPTION = f.read() # Fetch the version for numexpr (will be put in variable `version`) with open(os.path.join('numexpr', 'version.py')) as f: exec(f.read()) def setup_package(): metadata = dict( description='Fast numerical expression evaluator for NumPy', author='David M. Cooke, Francesc Alted and others', author_email='david.m.cooke@gmail.com, faltet@gmail.com', url='https://github.com/pydata/numexpr', long_description=LONG_DESCRIPTION, license='MIT', packages=['numexpr'], install_requires=requirements, setup_requires=requirements ) if (len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or (sys.argv[1] in ( '--help-commands', 'egg_info', '--version', 'clean', '--name')))): # For these actions, NumPy is not required. # # They are required to succeed without Numpy for example when # pip is used to install Numexpr when Numpy is not yet present in # the system. # (via https://github.com/abhirk/scikit-learn/blob/master/setup.py) try: from setuptools import setup except ImportError: from distutils.core import setup metadata['name'] = 'numexpr' metadata['version'] = version else: from numpy.distutils.core import setup from numpy.distutils.command.build_ext import build_ext as numpy_build_ext try: # Python 3 # Code taken form numpy/distutils/command/build_py.py # XXX: update LICENSES from distutils.command.build_py import build_py_2to3 as old_build_py from numpy.distutils.misc_util import is_string class build_py(old_build_py): def run(self): build_src = self.get_finalized_command('build_src') if build_src.py_modules_dict and self.packages is None: self.packages = list(build_src.py_modules_dict.keys()) old_build_py.run(self) def find_package_modules(self, package, package_dir): modules = old_build_py.find_package_modules( self, package, package_dir) # Find build_src generated *.py files. build_src = self.get_finalized_command('build_src') modules += build_src.py_modules_dict.get(package, []) return modules def find_modules(self): old_py_modules = self.py_modules[:] new_py_modules = list(filter(is_string, self.py_modules)) self.py_modules[:] = new_py_modules modules = old_build_py.find_modules(self) self.py_modules[:] = old_py_modules return modules except ImportError: # Python 2 from numpy.distutils.command.build_py import build_py DEBUG = False def localpath(*args): return op.abspath(op.join(*((op.dirname(__file__),) + args))) def debug(instring): if DEBUG: print(" DEBUG: " + instring) def configuration(): from numpy.distutils.misc_util import Configuration, dict_append from numpy.distutils.system_info import system_info config = Configuration('numexpr') #try to find configuration for MKL, either from environment or site.cfg if op.exists('site.cfg'): mkl_config_data = config.get_info('mkl') # Some version of MKL needs to be linked with libgfortran. # For this, use entries of DEFAULT section in site.cfg. default_config = system_info() dict_append(mkl_config_data, libraries=default_config.get_libraries(), library_dirs=default_config.get_lib_dirs()) else: mkl_config_data = {} # setup information for C extension if os.name == 'nt': pthread_win = ['numexpr/win32/pthread.c'] else: pthread_win = [] extension_config_data = { 'sources': ['numexpr/interpreter.cpp', 'numexpr/module.cpp', 'numexpr/numexpr_object.cpp'] + pthread_win, 'depends': ['numexpr/interp_body.cpp', 'numexpr/complex_functions.hpp', 'numexpr/interpreter.hpp', 'numexpr/module.hpp', 'numexpr/msvc_function_stubs.hpp', 'numexpr/numexpr_config.hpp', 'numexpr/numexpr_object.hpp'], 'libraries': ['m'], 'extra_compile_args': ['-funroll-all-loops', ], } dict_append(extension_config_data, **mkl_config_data) if 'library_dirs' in mkl_config_data: library_dirs = ':'.join(mkl_config_data['library_dirs']) config.add_extension('interpreter', **extension_config_data) config.set_options(quiet=True) config.make_config_py() config.add_subpackage('tests', 'numexpr/tests') #version handling config.get_version('numexpr/version.py') return config class cleaner(clean): def run(self): # Recursive deletion of build/ directory path = localpath("build") try: shutil.rmtree(path) except Exception: debug("Failed to remove directory %s" % path) else: debug("Cleaned up %s" % path) # Now, the extension and other files try: import imp except ImportError: if os.name == 'posix': paths = [localpath("numexpr/interpreter.so")] else: paths = [localpath("numexpr/interpreter.pyd")] else: paths = [] for suffix, _, _ in imp.get_suffixes(): if suffix == '.py': continue paths.append(localpath("numexpr", "interpreter" + suffix)) paths.append(localpath("numexpr/__config__.py")) paths.append(localpath("numexpr/__config__.pyc")) for path in paths: try: os.remove(path) except Exception: debug("Failed to clean up file %s" % path) else: debug("Cleaning up %s" % path) clean.run(self) class build_ext(numpy_build_ext): def build_extension(self, ext): # at this point we know what the C compiler is. if self.compiler.compiler_type == 'msvc' or self.compiler.compiler_type == 'intelemw': ext.extra_compile_args = [] # also remove extra linker arguments msvc doesn't understand ext.extra_link_args = [] # also remove gcc math library ext.libraries.remove('m') numpy_build_ext.build_extension(self, ext) if setuptools: metadata['zip_safe'] = False metadata['cmdclass'] = { 'build_ext': build_ext, 'clean': cleaner, 'build_py': build_py, } metadata['configuration'] = configuration setup(**metadata) if __name__ == '__main__': setup_package() numexpr-2.7.1/site.cfg.example000066400000000000000000000054621360375525100163140ustar00rootroot00000000000000# if setup does not find the MKL or does not use the configuration, copy this # file to "site.cfg" and edit the paths according to your installation of the # Intel MKL. # Note: some versions of MKL need to be linked to gfortran if compiled with # GNU C compiler. Uncomment next line if you get an error like "undefined # symbol: _gfortran_malloc" # # Note2: Some Fedora users reported that they had to install a # compatible version of the gfortran lib. See: # http://code.google.com/p/numexpr/issues/detail?id=15 # for more info. [DEFAULT] #libraries = gfortran #[mkl] # Example for the MKL included in Intel C 11.0 compiler # (you may need a recent NumPy version for being able to search libraries # in different directories at a time) #library_dirs = /opt/intel/Compiler/11.0/074/mkl/lib/em64t/:/opt/intel/Compiler/11.0/074/lib/intel64 #include_dirs = /opt/intel/Compiler/11.0/074/mkl/include/ #libraries = mkl_solver_ilp64, mkl_intel_ilp64, mkl_intel_thread, mkl_core, iomp5 # This seems to work for MKL 11 with processors with AVX (Sandy Bridge and above) for Linux #library_dirs = /opt/intel/composerxe/mkl/lib/intel64:/opt/intel/composer_xe_2013.3.163/compiler/lib/intel64 #include_dirs = /opt/intel/composerxe/mkl/include/ #libraries = mkl_intel_lp64, mkl_gf_lp64, mkl_intel_thread, mkl_core, mkl_blas95_lp64, mkl_lapack95_lp64, mkl_avx, mkl_vml_avx, mkl_rt, iomp5 ## Example for using MKL 10.0 #library_dirs = /opt/intel/mkl/10.0.2.018/lib/em64t #include_dirs = /opt/intel/mkl/10.0.2.018/include # Example for using MKL 10.2 for Windows 64-bit #include_dirs = \Program Files\Intel\MKL\10.2.5.035\include #library_dirs = \Program Files\Intel\MKL\10.2.5.035\em64t\lib #libraries = mkl_solver_ilp64, mkl_core, mkl_intel_thread, mkl_intel_ilp64, libiomp5md # The next works too, but for LP64 arithmetic #libraries = mkl_core, mkl_intel_thread, mkl_intel_lp64, libiomp5md # Example with Intel compiler version 14.0.2 and MKL v11.1.2 on intel64 architecture #libraries = mkl_intel_lp64, mkl_gf_lp64, mkl_intel_thread, mkl_core, mkl_def, mkl_vml_avx, mkl_rt, iomp5 # For details, see https://github.com/pydata/numexpr/issues/148 # Example for MKL2018 on Windows x64 # https://software.intel.com/en-us/articles/building-numpyscipy-with-intel-mkl-and-intel-fortran-on-windows # Please note that the valid os.pathsep is ';' on Windows and ':' on Linux # and that numpy.distutils.system_info only accepts the first instance of library_dirs # Last make sure you do `python setup.py install` instead of using pip. [mkl] library_dirs=/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/mkl/lib/intel64;/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/compiler/lib/intel64 include_dirs=/Program Files (x86)/IntelSWTools/compilers_and_libraries/windows/mkl/include libraries = mkl_intel_lp64, mkl_intel_thread, mkl_core, libiomp5md